def __init__(self, dataset): """Creates a new iterator over the given dataset. For example: ```python dataset = tf.data.Dataset.range(4) for x in Iterator(dataset): print(x) ``` Tensors produced will be placed on the device on which this iterator object was created. Args: dataset: A `tf.data.Dataset` object. Raises: TypeError: If `dataset` is an unsupported type. RuntimeError: When invoked without eager execution enabled. """ if not context.context().device_spec.device_type: is_remote_device = False else: is_remote_device = context.context().device_spec.device_type != "CPU" if is_remote_device: with ops.device(None): # Let the placer figure out where to place the various functions etc. # created by the CopyToDeviceDataset. dataset = dataset.apply(prefetching_ops.copy_to_device( context.context().device_name)) dataset = dataset.prefetch(1) super(Iterator, self).__init__(dataset)
def set_visible_devices(devices, device_type=None): """Set the list of visible devices. Sets the list of PhysicalDevices to be marked as visible to the runtime. Any devices that are not marked as visible means TensorFlow will not allocate memory on it and will not be able to place any operations on it as no LogicalDevice will be created on it. By default all discovered devices are marked as visible. The following example demonstrates disabling the first GPU on the machine. ```python physical_devices = config.experimental.list_physical_devices('GPU') assert len(physical_devices) > 0, "Not enough GPU hardware devices available" # Disable first GPU tf.config.experimental.set_visible_devices(physical_devices[1:], 'GPU') logical_devices = config.experimental.list_logical_devices('GPU') # Logical device was not created for first GPU assert len(logical_devices) == len(physical_devices) - 1 ``` Args: devices: (optional) List of PhysicalDevice objects to make visible device_type: (optional) Device types to limit visibility configuration to. Other device types will be left unaltered. """ context.context().set_visible_devices(devices, device_type)
def set_optimizer_experimental_options(options): """Set experimental optimizer options. Note that optimizations are only applied in graph mode, (within tf.function). In addition, as these are experimental options, the list is subject to change. Args: options: Dictionary of experimental optimizer options to configure. Valid keys: - layout_optimizer: Optimize tensor layouts e.g. This will try to use NCHW layout on GPU which is faster. - constant_folding: Fold constants Statically infer the value of tensors when possible, and materialize the result using constants. - shape_optimization: Simplify computations made on shapes. - remapping: Remap subgraphs onto more efficient implementations. - arithmetic_optimization: Simplify arithmetic ops with common sub-expression elimination and arithmetic simplification. - dependency_optimization: Control dependency optimizations. Remove redundant control dependencies, which may enable other optimization. This optimizer is also essential for pruning Identity and NoOp nodes. - loop_optimization: Loop optimizations. - function_optimization: Function optimizations and inlining. - debug_stripper: Strips debug-related nodes from the graph. - disable_model_pruning: Disable removal of unnecessary ops from the graph - scoped_allocator_optimization: Try to allocate some independent Op outputs contiguously in order to merge or eliminate downstream Ops. - pin_to_host_optimization: Force small ops onto the CPU. - implementation_selector: Enable the swap of kernel implementations based on the device placement. - disable_meta_optimizer: Disable the entire meta optimizer. - min_graph_nodes: The minimum number of nodes in a graph to optimizer. For smaller graphs, optimization is skipped. """ context.context().set_optimizer_experimental_options(options)
def testGpuInvalidConfig(self): gpus = config.list_physical_devices('GPU') self.assertNotEqual(len(gpus), 0) for gpu in gpus: config.set_memory_growth(gpu, True) c = context.context().config self.assertTrue(c.gpu_options.allow_growth) with self.assertRaisesRegexp(ValueError, 'memory limit'): config.set_virtual_device_configuration(gpus[-1], [ context.VirtualDeviceConfiguration(), context.VirtualDeviceConfiguration() ]) self.assertIsNone(config.get_virtual_device_configuration(gpus[-1])) config.set_virtual_device_configuration(gpus[-1], [ context.VirtualDeviceConfiguration(memory_limit=10), context.VirtualDeviceConfiguration(memory_limit=10) ]) c = context.context().config self.assertFalse(c.gpu_options.allow_growth) with self.assertRaisesRegexp(ValueError, 'virtual devices'): config.set_memory_growth(gpus[-1], False)
def testBadConstructorArgs(self): ctx = context.context() handle = ctx._handle device = ctx.device_name # Missing context. with self.assertRaisesRegexp( TypeError, r"Required argument 'context' \(pos 2\) not found"): ops.EagerTensor(1, device=device) # Missing device. with self.assertRaisesRegexp( TypeError, r"Required argument 'device' \(pos 3\) not found"): ops.EagerTensor(1, context=handle) # Bad dtype type. with self.assertRaisesRegexp(TypeError, "Expecting a DataType value for dtype. Got"): ops.EagerTensor(1, context=handle, device=device, dtype="1") # Following errors happen when trying to copy to GPU. if not context.context().num_gpus(): self.skipTest("No GPUs found") with ops.device("/device:GPU:0"): device = ctx.device_name # Bad context. with self.assertRaisesRegexp( TypeError, "Expecting a PyCapsule encoded context handle. Got"): ops.EagerTensor(1.0, context=1, device=device) # Bad device. with self.assertRaisesRegexp( TypeError, "Error parsing device argument to CopyToDevice"): ops.EagerTensor(1.0, context=handle, device=1)
def testJit(self): self.assertEqual(config.get_optimizer_jit(), False) # the following function should cause Op fusion to occur. However, there is # unfortunately no straightforward way to ensure this. We will just have to # settle for creating a test that can trigger JIT. @def_function.function def fun(a, b): c = a * b d = c + a return d a = constant_op.constant([2., 2.]) b = constant_op.constant([2., 2.]) self.evaluate(fun(a, b)) config.set_optimizer_jit(True) self.assertEqual(config.get_optimizer_jit(), True) self.assertEqual(config.get_optimizer_jit(), context.context().optimizer_jit) self.evaluate(fun(a, b)) config.set_optimizer_jit(False) self.assertEqual(config.get_optimizer_jit(), False) self.assertEqual(config.get_optimizer_jit(), context.context().optimizer_jit) self.evaluate(fun(a, b))
def testSoftPlacement(self): if context.executing_eagerly(): self.assertTrue(config.get_soft_device_placement()) else: self.assertFalse(config.get_soft_device_placement()) @def_function.function def mod(): with ops.device('/device:GPU:0'): a = constant_op.constant(1.0) b = constant_op.constant(1.0) return math_ops.mod(a, b) config.set_soft_device_placement(True) self.assertEqual(config.get_soft_device_placement(), True) self.assertEqual( config.get_soft_device_placement(), context.context().soft_device_placement) # Since soft placement is enabled, the mod operation should work with CPU mod() config.set_soft_device_placement(False) self.assertEqual(config.get_soft_device_placement(), False) self.assertEqual( config.get_soft_device_placement(), context.context().soft_device_placement) # Since soft placement is disabled, the mod operation should fail on GPU with self.assertRaises(errors.InvalidArgumentError): mod()
def testCopyScope(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') constant = constant_op.constant(1.0) with ops.device('gpu:0'): with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): c = constant + 1.0 self.assertAllEqual(c, 2.0)
def __del__(self): if self._created_eagerly: try: context.context().end_step() except AttributeError: pass except TypeError: pass
def __init__(self, dataset): """Creates a new iterator over the given dataset. For example: ```python dataset = tf.data.Dataset.range(4) for x in Iterator(dataset): print(x) ``` Tensors produced will be placed on the device on which this iterator object was created. Args: dataset: A `tf.data.Dataset` object. Raises: TypeError: If `dataset` is an unsupported type. RuntimeError: When invoked without eager execution enabled. """ if isinstance(dataset, prefetching_ops._PrefetchToDeviceDataset): # pylint: disable=protected-access raise TypeError( "`tf.contrib.data.prefetch_to_device()` is not compatible with " "`tf.contrib.eager.Iterator`. Use `for ... in dataset:` to iterate " "over the dataset instead.") super(Iterator, self).__init__(dataset) if not context.context().device_spec.device_type: is_remote_device = False else: is_remote_device = context.context().device_spec.device_type != "CPU" self._buffer_resource_handle = None if is_remote_device: with ops.device("/device:CPU:0"): iter_string_handle = gen_dataset_ops.iterator_to_string_handle( self._resource) @function.Defun(dtypes.string) def remote_fn(h): remote_iterator = iterator_ops.Iterator.from_string_handle( h, self.output_types, self.output_shapes, self.output_classes) return remote_iterator.get_next() remote_fn.add_to_graph(None) target = constant_op.constant("/device:CPU:0") with ops.device(self._device): self._buffer_resource_handle = prefetching_ops.function_buffering_resource( # pylint: disable=line-too-long string_arg=iter_string_handle, output_types=self._flat_output_types, f=remote_fn, target_device=target, buffer_size=10, container="", shared_name=_generate_shared_name( "contrib_eager_iterator_function_buffer_resource")) self._buffer_resource_deleter = resource_variable_ops.EagerResourceDeleter( # pylint: disable=line-too-long handle=self._buffer_resource_handle, handle_device=self._device)
def testV1CompatibilityDummyInivisibleDeviceList(self): gpus = config.list_physical_devices('GPU') if gpus: self.skipTest('Test requires no GPUs') # Ensure GPU options left untouched on CPU only environments context.context()._physical_devices = None context.context()._config = config_pb2.ConfigProto( gpu_options=config_pb2.GPUOptions(visible_device_list='0')) new_config = context.context().config self.assertEqual(new_config.gpu_options.visible_device_list, '0')
def as_default(self): if self._resource is None: yield else: old = context.context().summary_writer_resource context.context().summary_writer_resource = self._resource yield # Flushes the summary writer in eager mode or in graph functions, but not # in legacy graph mode (you're on your own there). with ops.device("cpu:0"): gen_summary_ops.flush_summary_writer(self._resource) context.context().summary_writer_resource = old
def __init__(self, persistent=False): """Creates a new GradientTape. Args: persistent: Boolean controlling whether a persistent gradient tape is created. False by default, which means at most one call can be made to the gradient() method on this object. """ self._tape = None self._persistent = persistent self._recording = False context.context().start_step()
def testContextSwitchStackContainsEagerMode(self): # Eager execution has been enabled, and no other context switch has # occurred, so `context_switches` should contain exactly one entry. self.assertEqual(len(context.context().context_switches.stack), 1) switch = context.context().context_switches.stack[0] # The entry should log that eager mode was entered. self.assertIs(switch.enter_context_fn, context.eager_mode) # It is not possible to build a graph function when eager execution # is enabled; the stack entry should reflect this fact. self.assertFalse(switch.is_building_function)
def as_default(self): """Enables summary writing within a `with` block.""" if self._resource is None: yield self else: old = context.context().summary_writer_resource context.context().summary_writer_resource = self._resource yield self # Flushes the summary writer in eager mode or in graph functions, but not # in legacy graph mode (you're on your own there). self.flush() context.context().summary_writer_resource = old
def testBenchmarks(self): # This isn't actually a test, but benchmarks packaged as a test # so that continuous integration runs catch any breakages. print(context.context()) benchmark_create_tensor(FLAGS.iters or 30000) benchmark_matmul([2, 2], FLAGS.iters or 30000) benchmark_matmul([100, 28 * 28], FLAGS.iters or 1000) if context.context().num_gpus() > 0: print("---- RUNNING ON GPU NOW ----") benchmark_matmul([2, 2], FLAGS.iters or 30000, use_gpu=True) benchmark_matmul([100, 28 * 28], FLAGS.iters or 1000, use_gpu=True)
def _benchmark_tfe_py_execute_matmul(self, m, transpose_b, num_iters): inputs = [m, m] # pylint: disable=protected-access ctx_handle = context.context()._handle # pylint: enable=protected-access device = context.context().device_name attrs = ("transpose_a", False, "transpose_b", transpose_b, "T", m.dtype.as_datatype_enum) def func(): pywrap_tensorflow.TFE_Py_Execute(ctx_handle, device, "MatMul", inputs, attrs, 1) self._run(func, num_iters)
def testCopyBetweenDevices(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') x = tensor.Tensor([[1., 2.], [3., 4.]]) x = x.as_cpu_tensor() x = x.as_gpu_tensor() x = x.as_gpu_tensor() x = x.as_cpu_tensor() # Invalid device with self.assertRaises(errors.InvalidArgumentError): x.as_gpu_tensor(context.context().num_gpus() + 1)
def as_default(self): """Returns a context manager that enables summary writing.""" if self._v2 and context.executing_eagerly() and self._closed: raise RuntimeError("SummaryWriter is already closed") old = context.context().summary_writer try: context.context().summary_writer = self yield self # Flushes the summary writer in eager mode or in graph functions, but # not in legacy graph mode (you're on your own there). self.flush() finally: context.context().summary_writer = old
def testCopyBetweenDevices(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') x = constant_op.constant([[1., 2.], [3., 4.]]) x = x.as_cpu_tensor() x = x.as_gpu_tensor() x = x.as_gpu_tensor() x = x.as_cpu_tensor() # Invalid device with self.assertRaises(RuntimeError): x.as_gpu_tensor(context.context().num_gpus() + 1)
def __init__(self, dist, coord, device, variable_creator_fn, fn, *args, **kwargs): super(MirroredStrategy._MirroredTowerThread, self).__init__() # pylint: disable=protected-access self.coord = coord self.distribution = dist self.device = device self.tower_id = dist.worker_devices.index(device) self.variable_creator_fn = variable_creator_fn # State needed to run and return the results of `fn`. self.main_fn = fn self.main_args = args self.main_kwargs = kwargs self.main_result = None self.done = False # State needed to run the next merge_call() (if any) requested via # TowerContext. self.merge_fn = None self.merge_args = None self.merge_kwargs = None self.merge_result = None self.captured_name_scope = None # We use a thread.Event for the main thread to signal when this # thread should start running (`should_run`), and another for # this thread to transfer control back to the main thread # (`has_paused`, either when it gets to a # `get_tower_context().merge_call` or when `fn` returns). In # either case the event starts cleared, is signaled by calling # set(). The receiving thread waits for the signal by calling # wait() and then immediately clearing the event using clear(). self.should_run = threading.Event() self.has_paused = threading.Event() # These fields have to do with inheriting various contexts from the # parent thread: # pylint: disable=protected-access self.context_mode = context.context()._eager_context.mode if not context.context()._context_handle: context.context()._initialize_handle_and_devices() self.context_device_policy = ( pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy( context.context()._context_handle)) self.graph = ops.get_default_graph() self._variable_creator_stack = self.graph._variable_creator_stack[:] self._captured_var_scope = variable_scope.get_variable_scope() # Adding a "/" at end lets us re-enter this scope later. self._name_scope = self.graph.get_name_scope() if self._name_scope: self._name_scope += "/" if self.tower_id > 0: if not self._name_scope: self._name_scope = "" self._name_scope += "tower_%d/" % self.tower_id
def testDevicePolicy(self): self.assertEqual(context.DEVICE_PLACEMENT_SILENT, context.context().device_policy) # If no op has been executed we should be able to set the device policy as # well as any init-time configs. config.set_intra_op_parallelism_threads(1) config.set_device_policy('silent') config.set_intra_op_parallelism_threads(2) # Excute a dummy op to ensure that the context has been initialized constant_op.constant(1) def copy_tensor(dtype=dtypes.int32): cpu_tensor = constant_op.constant(1, dtype=dtype) gpu_tensor = cpu_tensor.gpu() self.assertAllEqual(cpu_tensor + gpu_tensor, 2.0) config.set_device_policy('silent') self.assertEqual(config.get_device_policy(), 'silent') self.assertEqual(context.DEVICE_PLACEMENT_SILENT, context.context().device_policy) copy_tensor() config.set_device_policy('silent_for_int32') self.assertEqual(config.get_device_policy(), 'silent_for_int32') self.assertEqual(context.DEVICE_PLACEMENT_SILENT_FOR_INT32, context.context().device_policy) with self.assertRaisesRegexp(errors.InvalidArgumentError, 'Tensors on conflicting devices'): copy_tensor(dtypes.float32) copy_tensor() config.set_device_policy('warn') self.assertEqual(config.get_device_policy(), 'warn') self.assertEqual(context.DEVICE_PLACEMENT_WARN, context.context().device_policy) copy_tensor() config.set_device_policy('explicit') self.assertEqual(config.get_device_policy(), 'explicit') self.assertEqual(context.DEVICE_PLACEMENT_EXPLICIT, context.context().device_policy) with self.assertRaisesRegexp(errors.InvalidArgumentError, 'Tensors on conflicting devices'): copy_tensor() config.set_device_policy(None) self.assertEqual(config.get_device_policy(), 'silent')
def trace_off(): """Stops the current trace and discards any collected information.""" global _current_trace_context with _current_trace_context_lock: _current_trace_context = None # Disabling run_metadata disables graph collection as well. context.context().disable_run_metadata() # profiler only has start and stop. One needs to stop in order to export # and stopping when it is not running will raise an error. try: _profiler.stop() except _profiler.ProfilerNotRunningError: pass
def as_default(self): """Enables summary writing within a `with` block.""" if self._resource is None: yield self else: old = context.context().summary_writer_resource try: context.context().summary_writer_resource = self._resource yield self # Flushes the summary writer in eager mode or in graph functions, but # not in legacy graph mode (you're on your own there). with ops.device("cpu:0"): gen_summary_ops.flush_summary_writer(self._resource) finally: context.context().summary_writer_resource = old
def _zeros(shape, dtype): """Wraps array_ops.zeros to cache last zero for a given shape and dtype.""" device = context.context().device_name if dtype == dtypes.variant: # TODO(apassos): need to save enough information about variant tensors to do # a zeros return None # pylint: disable=protected-access cache_key = shape, dtype, device, context.context()._eager_context.mode # pylint: enable=protected-access cached = _zeros_cache.get(cache_key) if cached is None: cached = _fast_fill(0, shape, dtype) _zeros_cache.put(cache_key, cached) return cached
def _simple_reduce(per_device_value, reduce_to_device, accumulation_fn, aggregation): # pylint: disable=g-missing-docstring all_values = [] count = 0 for v in per_device_value._index.values(): # pylint: disable=protected-access if isinstance(v, value_lib.MapOutput): v_list = v.get() if not v_list: continue count += len(v_list) # Sum within each device before aggregating across devices. # TODO(yuefengz): Check whether it helps to use accumulation_fn here. v = cross_tower_utils.aggregate_tensors_or_indexed_slices( v_list, math_ops.add_n) else: count += 1 all_values.append(v) if not all_values: raise ValueError("`per_device_value` must be non-empty") with ops.device(reduce_to_device): with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): reduced = cross_tower_utils.aggregate_tensors_or_indexed_slices( all_values, accumulation_fn) if aggregation == vs.VariableAggregation.MEAN: reduced = cross_tower_utils.divide_by_n_tensors_or_indexed_slices( reduced, count) elif aggregation != vs.VariableAggregation.SUM: raise ValueError("`aggregation` must be VariableAggregation.SUM " "or VariableAggregation.MEAN.") return reduced
def _real_mirrored_creator(devices, *args, **kwargs): # pylint: disable=g-missing-docstring index = {} for i, d in enumerate(devices): with ops.device(d): if i > 0: # Give replicas meaningful distinct names: var0name = index[devices[0]].name.split(":")[0] # We append a / to variable names created on towers with id > 0 to # ensure that we ignore the name scope and instead use the given # name as the absolute name of the variable. kwargs["name"] = "%s/replica_%d/" % (var0name, i) # Initialize replicas with the same value: if context.executing_eagerly(): kwargs["initial_value"] = array_ops.identity( index[devices[0]].value()) else: def initial_value_fn(device=d): with ops.device(device): return array_ops.identity(index[devices[0]].initial_value) kwargs["initial_value"] = initial_value_fn with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): v = next_creator(*args, **kwargs) assert not isinstance(v, values.DistributedVariable) index[d] = v return index
def testMatMulGPU(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') three = tensor.Tensor([[3.]]).as_gpu_tensor() five = tensor.Tensor([[5.]]).as_gpu_tensor() product = math_ops.matmul(three, five) self.assertEqual([[15.0]], product.numpy())
def testOutputOnHostMemory(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') # The Shape op kernel on GPU places the output in host memory. value = tensor.Tensor([1.]).as_gpu_tensor() shape = array_ops.shape(value) self.assertEquals([1], shape.numpy())
def testExecutionMode(self): self.assertTrue(config.get_synchronous_execution()) self.assertEqual(context.SYNC, context.context().execution_mode) # If no op has been executed we should be able to set the execution mode as # well as any init-time configs. config.set_intra_op_parallelism_threads(1) config.set_synchronous_execution(False) config.set_intra_op_parallelism_threads(2) config.set_synchronous_execution(True) self.assertTrue(config.get_synchronous_execution()) self.assertEqual(context.SYNC, context.context().execution_mode) config.set_synchronous_execution(False) self.assertFalse(config.get_synchronous_execution()) self.assertEqual(context.ASYNC, context.context().execution_mode)
def debug_nan_count(input, device_name="", tensor_name="", debug_urls=[], gated_grpc=False, name=None): r"""Debug NaN Value Counter Op. Counts number of NaNs in the input tensor, for debugging. Args: input: A `Tensor`. Input tensor, non-Reference type. device_name: An optional `string`. Defaults to `""`. tensor_name: An optional `string`. Defaults to `""`. Name of the input tensor. debug_urls: An optional list of `strings`. Defaults to `[]`. List of URLs to debug targets, e.g., file:///foo/tfdbg_dump, grpc:://localhost:11011. gated_grpc: An optional `bool`. Defaults to `False`. Whether this op will be gated. If any of the debug_urls of this debug node is of the grpc:// scheme, when the value of this attribute is set to True, the data will not actually be sent via the grpc stream unless this debug op has been enabled at the debug_url. If all of the debug_urls of this debug node are of the grpc:// scheme and the debug op is enabled at none of them, the output will be an empty Tensor. name: A name for the operation (optional). Returns: A `Tensor` of type `int64`. """ _ctx = _context._context or _context.context() tld = _ctx._thread_local_data if tld.is_eager: try: _result = pywrap_tfe.TFE_Py_FastPathExecute( _ctx._context_handle, tld.device_name, "DebugNanCount", name, tld.op_callbacks, input, "device_name", device_name, "tensor_name", tensor_name, "debug_urls", debug_urls, "gated_grpc", gated_grpc) return _result except _core._NotOkStatusException as e: _ops.raise_from_not_ok_status(e, name) except _core._FallbackException: pass try: return debug_nan_count_eager_fallback(input, device_name=device_name, tensor_name=tensor_name, debug_urls=debug_urls, gated_grpc=gated_grpc, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. # Add nodes to the TensorFlow graph. if device_name is None: device_name = "" device_name = _execute.make_str(device_name, "device_name") if tensor_name is None: tensor_name = "" tensor_name = _execute.make_str(tensor_name, "tensor_name") if debug_urls is None: debug_urls = [] if not isinstance(debug_urls, (list, tuple)): raise TypeError("Expected list for 'debug_urls' argument to " "'debug_nan_count' Op, not %r." % debug_urls) debug_urls = [_execute.make_str(_s, "debug_urls") for _s in debug_urls] if gated_grpc is None: gated_grpc = False gated_grpc = _execute.make_bool(gated_grpc, "gated_grpc") _, _, _op, _outputs = _op_def_library._apply_op_helper( "DebugNanCount", input=input, device_name=device_name, tensor_name=tensor_name, debug_urls=debug_urls, gated_grpc=gated_grpc, name=name) _result = _outputs[:] if _execute.must_record_gradient(): _attrs = ("T", _op._get_attr_type("T"), "device_name", _op.get_attr("device_name"), "tensor_name", _op.get_attr("tensor_name"), "debug_urls", _op.get_attr("debug_urls"), "gated_grpc", _op._get_attr_bool("gated_grpc")) _inputs_flat = _op.inputs _execute.record_gradient("DebugNanCount", _inputs_flat, _attrs, _result) _result, = _result return _result
def load_and_remap_matrix(ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values, num_rows, num_cols, max_rows_in_memory=-1, name=None): r"""Loads a 2-D (matrix) `Tensor` with name `old_tensor_name` from the checkpoint at `ckpt_path` and potentially reorders its rows and columns using the specified remappings. Most users should use one of the wrapper initializers (such as `tf.contrib.framework.load_and_remap_matrix_initializer`) instead of this function directly. The remappings are 1-D tensors with the following properties: * `row_remapping` must have exactly `num_rows` entries. Row `i` of the output matrix will be initialized from the row corresponding to index `row_remapping[i]` in the old `Tensor` from the checkpoint. * `col_remapping` must have either 0 entries (indicating that no column reordering is needed) or `num_cols` entries. If specified, column `j` of the output matrix will be initialized from the column corresponding to index `col_remapping[j]` in the old `Tensor` from the checkpoint. * A value of -1 in either of the remappings signifies a "missing" entry. In that case, values from the `initializing_values` tensor will be used to fill that missing row or column. If `row_remapping` has `r` missing entries and `col_remapping` has `c` missing entries, then the following condition must be true: `(r * num_cols) + (c * num_rows) - (r * c) == len(initializing_values)` The remapping tensors can be generated using the GenerateVocabRemapping op. As an example, with row_remapping = [1, 0, -1], col_remapping = [0, 2, -1], initializing_values = [0.5, -0.5, 0.25, -0.25, 42], and w(i, j) representing the value from row i, column j of the old tensor in the checkpoint, the output matrix will look like the following: [[w(1, 0), w(1, 2), 0.5], [w(0, 0), w(0, 2), -0.5], [0.25, -0.25, 42]] Args: ckpt_path: A `Tensor` of type `string`. Path to the TensorFlow checkpoint (version 2, `TensorBundle`) from which the old matrix `Tensor` will be loaded. old_tensor_name: A `Tensor` of type `string`. Name of the 2-D `Tensor` to load from checkpoint. row_remapping: A `Tensor` of type `int64`. An int `Tensor` of row remappings (generally created by `generate_vocab_remapping`). Even if no row remapping is needed, this must still be an index-valued Tensor (e.g. [0, 1, 2, ...]), or a shifted index-valued `Tensor` (e.g. [8, 9, 10, ...], for partitioned `Variables`). col_remapping: A `Tensor` of type `int64`. An int `Tensor` of column remappings (generally created by `generate_vocab_remapping`). May be a size-0 `Tensor` if only row remapping is to be done (e.g. column ordering is the same). initializing_values: A `Tensor` of type `float32`. A float `Tensor` containing values to fill in for cells in the output matrix that are not loaded from the checkpoint. Length must be exactly the same as the number of missing / new cells. num_rows: An `int` that is `>= 0`. Number of rows (length of the 1st dimension) in the output matrix. num_cols: An `int` that is `>= 1`. Number of columns (length of the 2nd dimension) in the output matrix. max_rows_in_memory: An optional `int`. Defaults to `-1`. The maximum number of rows to load from the checkpoint at once. If less than or equal to 0, the entire matrix will be loaded into memory. Setting this arg trades increased disk reads for lower memory usage. name: A name for the operation (optional). Returns: A `Tensor` of type `float32`. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "LoadAndRemapMatrix", name, _ctx.post_execution_callbacks, ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values, "num_rows", num_rows, "num_cols", num_cols, "max_rows_in_memory", max_rows_in_memory) return _result except _core._FallbackException: try: return load_and_remap_matrix_eager_fallback( ckpt_path, old_tensor_name, row_remapping, col_remapping, initializing_values, num_rows=num_rows, num_cols=num_cols, max_rows_in_memory=max_rows_in_memory, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. num_rows = _execute.make_int(num_rows, "num_rows") num_cols = _execute.make_int(num_cols, "num_cols") if max_rows_in_memory is None: max_rows_in_memory = -1 max_rows_in_memory = _execute.make_int(max_rows_in_memory, "max_rows_in_memory") _, _, _op = _op_def_lib._apply_op_helper( "LoadAndRemapMatrix", ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, row_remapping=row_remapping, col_remapping=col_remapping, initializing_values=initializing_values, num_rows=num_rows, num_cols=num_cols, max_rows_in_memory=max_rows_in_memory, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("num_rows", _op.get_attr("num_rows"), "num_cols", _op.get_attr("num_cols"), "max_rows_in_memory", _op.get_attr("max_rows_in_memory")) _execute.record_gradient("LoadAndRemapMatrix", _inputs_flat, _attrs, _result, name) _result, = _result return _result
def generate_vocab_remapping(new_vocab_file, old_vocab_file, new_vocab_offset, num_new_vocab, old_vocab_size=-1, name=None): r"""Given a path to new and old vocabulary files, returns a remapping Tensor of length `num_new_vocab`, where `remapping[i]` contains the row number in the old vocabulary that corresponds to row `i` in the new vocabulary (starting at line `new_vocab_offset` and up to `num_new_vocab` entities), or `-1` if entry `i` in the new vocabulary is not in the old vocabulary. The old vocabulary is constrained to the first `old_vocab_size` entries if `old_vocab_size` is not the default value of -1. `num_vocab_offset` enables use in the partitioned variable case, and should generally be set through examining partitioning info. The format of the files should be a text file, with each line containing a single entity within the vocabulary. For example, with `new_vocab_file` a text file containing each of the following elements on a single line: `[f0, f1, f2, f3]`, old_vocab_file = [f1, f0, f3], `num_new_vocab = 3, new_vocab_offset = 1`, the returned remapping would be `[0, -1, 2]`. The op also returns a count of how many entries in the new vocabulary were present in the old vocabulary, which is used to calculate the number of values to initialize in a weight matrix remapping This functionality can be used to remap both row vocabularies (typically, features) and column vocabularies (typically, classes) from TensorFlow checkpoints. Note that the partitioning logic relies on contiguous vocabularies corresponding to div-partitioned variables. Moreover, the underlying remapping uses an IndexTable (as opposed to an inexact CuckooTable), so client code should use the corresponding index_table_from_file() as the FeatureColumn framework does (as opposed to tf.feature_to_id(), which uses a CuckooTable). Args: new_vocab_file: A `Tensor` of type `string`. Path to the new vocab file. old_vocab_file: A `Tensor` of type `string`. Path to the old vocab file. new_vocab_offset: An `int` that is `>= 0`. How many entries into the new vocab file to start reading. num_new_vocab: An `int` that is `>= 0`. Number of entries in the new vocab file to remap. old_vocab_size: An optional `int` that is `>= -1`. Defaults to `-1`. Number of entries in the old vocab file to consider. If -1, use the entire old vocabulary. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (remapping, num_present). remapping: A `Tensor` of type `int64`. num_present: A `Tensor` of type `int32`. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "GenerateVocabRemapping", name, _ctx.post_execution_callbacks, new_vocab_file, old_vocab_file, "new_vocab_offset", new_vocab_offset, "num_new_vocab", num_new_vocab, "old_vocab_size", old_vocab_size) _result = _GenerateVocabRemappingOutput._make(_result) return _result except _core._FallbackException: try: return generate_vocab_remapping_eager_fallback( new_vocab_file, old_vocab_file, new_vocab_offset=new_vocab_offset, num_new_vocab=num_new_vocab, old_vocab_size=old_vocab_size, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. new_vocab_offset = _execute.make_int(new_vocab_offset, "new_vocab_offset") num_new_vocab = _execute.make_int(num_new_vocab, "num_new_vocab") if old_vocab_size is None: old_vocab_size = -1 old_vocab_size = _execute.make_int(old_vocab_size, "old_vocab_size") _, _, _op = _op_def_lib._apply_op_helper("GenerateVocabRemapping", new_vocab_file=new_vocab_file, old_vocab_file=old_vocab_file, new_vocab_offset=new_vocab_offset, num_new_vocab=num_new_vocab, old_vocab_size=old_vocab_size, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("new_vocab_offset", _op.get_attr("new_vocab_offset"), "num_new_vocab", _op.get_attr("num_new_vocab"), "old_vocab_size", _op.get_attr("old_vocab_size")) _execute.record_gradient("GenerateVocabRemapping", _inputs_flat, _attrs, _result, name) _result = _GenerateVocabRemappingOutput._make(_result) return _result
def sequence_file_dataset(filenames, output_types, name=None): r"""TODO: add doc. Args: filenames: A `Tensor` of type `string`. output_types: A list of `tf.DTypes` that has length `>= 1`. name: A name for the operation (optional). Returns: A `Tensor` of type `variant`. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "SequenceFileDataset", name, _ctx._post_execution_callbacks, filenames, "output_types", output_types) return _result except _core._FallbackException: try: return sequence_file_dataset_eager_fallback( filenames, output_types=output_types, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except (TypeError, ValueError): result = _dispatch.dispatch(sequence_file_dataset, filenames=filenames, output_types=output_types, name=name) if result is not _dispatch.OpDispatcher.NOT_SUPPORTED: return result raise except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. if not isinstance(output_types, (list, tuple)): raise TypeError("Expected list for 'output_types' argument to " "'sequence_file_dataset' Op, not %r." % output_types) output_types = [ _execute.make_type(_t, "output_types") for _t in output_types ] try: _, _, _op = _op_def_lib._apply_op_helper("SequenceFileDataset", filenames=filenames, output_types=output_types, name=name) except (TypeError, ValueError): result = _dispatch.dispatch(sequence_file_dataset, filenames=filenames, output_types=output_types, name=name) if result is not _dispatch.OpDispatcher.NOT_SUPPORTED: return result raise _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("output_types", _op.get_attr("output_types")) _execute.record_gradient("SequenceFileDataset", _inputs_flat, _attrs, _result, name) _result, = _result return _result
def big_query_reader(project_id, dataset_id, table_id, columns, timestamp_millis, container="", shared_name="", test_end_point="", name=None): r"""A Reader that outputs rows from a BigQuery table as tensorflow Examples. Args: project_id: A `string`. GCP project ID. dataset_id: A `string`. BigQuery Dataset ID. table_id: A `string`. Table to read. columns: A list of `strings`. List of columns to read. Leave empty to read all columns. timestamp_millis: An `int`. Table snapshot timestamp in millis since epoch. Relative (negative or zero) snapshot times are not allowed. For more details, see 'Table Decorators' in BigQuery docs. container: An optional `string`. Defaults to `""`. If non-empty, this reader is placed in the given container. Otherwise, a default container is used. shared_name: An optional `string`. Defaults to `""`. If non-empty, this reader is named in the given bucket with this shared_name. Otherwise, the node name is used instead. test_end_point: An optional `string`. Defaults to `""`. Do not use. For testing purposes only. name: A name for the operation (optional). Returns: A `Tensor` of type mutable `string`. The handle to reference the Reader. """ project_id = _execute.make_str(project_id, "project_id") dataset_id = _execute.make_str(dataset_id, "dataset_id") table_id = _execute.make_str(table_id, "table_id") if not isinstance(columns, (list, tuple)): raise TypeError( "Expected list for 'columns' argument to " "'big_query_reader' Op, not %r." % columns) columns = [_execute.make_str(_s, "columns") for _s in columns] timestamp_millis = _execute.make_int(timestamp_millis, "timestamp_millis") if container is None: container = "" container = _execute.make_str(container, "container") if shared_name is None: shared_name = "" shared_name = _execute.make_str(shared_name, "shared_name") if test_end_point is None: test_end_point = "" test_end_point = _execute.make_str(test_end_point, "test_end_point") _ctx = _context.context() if _ctx.in_graph_mode(): _, _, _op = _op_def_lib._apply_op_helper( "BigQueryReader", project_id=project_id, dataset_id=dataset_id, table_id=table_id, columns=columns, timestamp_millis=timestamp_millis, container=container, shared_name=shared_name, test_end_point=test_end_point, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("container", _op.get_attr("container"), "shared_name", _op.get_attr("shared_name"), "project_id", _op.get_attr("project_id"), "dataset_id", _op.get_attr("dataset_id"), "table_id", _op.get_attr("table_id"), "columns", _op.get_attr("columns"), "timestamp_millis", _op.get_attr("timestamp_millis"), "test_end_point", _op.get_attr("test_end_point")) else: raise RuntimeError( "big_query_reader op does not support eager execution. Arg 'reader_handle'' is a ref.") _execute.record_gradient( "BigQueryReader", _inputs_flat, _attrs, _result, name) _result, = _result return _result
def ragged_tensor_to_sparse(rt_nested_splits, rt_dense_values, name=None): r"""Converts a `RaggedTensor` into a `SparseTensor` with the same values. input=ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) output=SparseTensor(indices=sparse_indices, values=sparse_values, dense_shape=sparse_dense_shape) Args: rt_nested_splits: A list of at least 1 `Tensor` objects with the same type in: `int32`, `int64`. The `row_splits` for the `RaggedTensor`. rt_dense_values: A `Tensor`. The `flat_values` for the `RaggedTensor`. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (sparse_indices, sparse_values, sparse_dense_shape). sparse_indices: A `Tensor` of type `int64`. sparse_values: A `Tensor`. Has the same type as `rt_dense_values`. sparse_dense_shape: A `Tensor` of type `int64`. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "RaggedTensorToSparse", name, _ctx._post_execution_callbacks, rt_nested_splits, rt_dense_values) _result = _RaggedTensorToSparseOutput._make(_result) return _result except _core._FallbackException: try: return ragged_tensor_to_sparse_eager_fallback(rt_nested_splits, rt_dense_values, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. if not isinstance(rt_nested_splits, (list, tuple)): raise TypeError("Expected list for 'rt_nested_splits' argument to " "'ragged_tensor_to_sparse' Op, not %r." % rt_nested_splits) _attr_RAGGED_RANK = len(rt_nested_splits) _, _, _op = _op_def_lib._apply_op_helper("RaggedTensorToSparse", rt_nested_splits=rt_nested_splits, rt_dense_values=rt_dense_values, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("RAGGED_RANK", _op.get_attr("RAGGED_RANK"), "T", _op.get_attr("T"), "Tsplits", _op.get_attr("Tsplits")) _execute.record_gradient("RaggedTensorToSparse", _inputs_flat, _attrs, _result, name) _result = _RaggedTensorToSparseOutput._make(_result) return _result
def dtensor_initialize_tpu_system(enable_coordination_service=False): """Initialize the TPU devices. Args: enable_coordination_service: If true, enable distributed coordination service to make sure that workers know the devices on each other, a prerequisite for data transfer through cross-worker rendezvous. Raises: RuntimeError: If running inside a tf.function. NotFoundError: If no TPU devices found in eager mode. """ assert context.executing_eagerly() # Reconfigure TensorFlow to use TFRT TPU runtime if requested. _configure_tpu_runtime() in_multi_client_mode = api.job_name() != "localhost" # Collective GRPC servers are only necessary in mutli-client setup. # Single clients (e.g. Forge) can use local mode of collectives. if in_multi_client_mode: if api.jobs() is None: raise ValueError( "DTENSOR_JOBS environment variable is required when" "using multi-client to properly set up communications between servers" ) multi_client_util.initialize_multi_client_cluster( job_name=api.job_name(), dtensor_jobs=api.jobs(), client_id=api.client_id(), collective_leader=api.full_job_name(task_id=0), enable_coordination_service=enable_coordination_service) # Make sure the server change is fully propagated before attempting to run # the core ID merging logic below. context.ensure_initialized() context.async_wait() context.context()._clear_caches() # pylint: disable=protected-access @function.defun def _tpu_init_fn(): return gen_dtensor_ops.configure_and_initialize_global_tpu() try: with ops.device("/job:" + api.full_job_name() + "/device:TPU_SYSTEM:0"): # pylint: disable=protected-access my_core_ids = _tpu_init_fn() logging.info("TPU core IDs: %s", my_core_ids) context.initialize_logical_devices() # Configure virtual CPUs that is 1:1 mapped to TPU cores. context.context().set_logical_cpu_devices( len(api.local_devices(_TPU_DEVICE_TYPE)), tf_device.DeviceSpec(job=api.job_name(), replica=0, task=api.client_id()).to_string()) # `my_core_ids` contains the IDs of TPU cores attached to this host. # # To generate correct and efficient XLA AllReduce group assignment, we must # merge these arrays from all hosts and broadcast the result back to all # hosts, so all hosts can use these mappings in their MLIR passes. # # This is essentially doing what WaitForDistributedTpuOp and # SetGlobalTPUArrayOp do, in our multi-client environment. task_id = api.client_id() num_tasks = api.num_clients() num_devices = api.num_global_devices(_TPU_DEVICE_TYPE) num_devices_per_task = int(num_devices / num_tasks) # Create a one-time use mesh and layout just for merging core IDs. mesh = layout_lib.Mesh([_MESH_DIM_X], *_create_device_array((num_devices, ), _TPU_DEVICE_TYPE, api.client_id())) layout = layout_lib.Layout([_MESH_DIM_X, layout_lib.UNSHARDED], mesh) device = dtensor_device.DTensorDevice(meshes=[mesh]) logging.info("TPU core locations: %s", device.tpu_core_ids_to_locations(my_core_ids)) # At this point, we don't know which cores are attached to other hosts. # The core ID mappings in the runtime haven't been set yet. # # The core ID merging AllReduce below is carefully written so it works # without needing correct core mappings to be set in the runtime. We will # use this AllReduce's result to set the core ID mappings, and all future # user-initiated AllReduces will use the mappings. # # The runtime is hard-coded to ignore core ID mappings on this AllReduce. all_core_ids = np.zeros([num_devices], dtype=np.int32) for i in range(len(my_core_ids)): all_core_ids[task_id * num_devices_per_task + i] = my_core_ids[i] # Only one local device gets valid input: 8 local core IDs among # (num_tasks - 1) * 8 zeros. The 8 core IDs are set using task ID as offset. # The other 7 local devices get zero inputs. All devices on all host # participate in one AllReduce, whose result will be core IDs arranged by # task-device ordinals. all_core_ids = constant_op.constant([all_core_ids]) zeros = array_ops.zeros_like(all_core_ids) all_core_ids = [all_core_ids] + [zeros] * (num_devices_per_task - 1) with ops.device(device.name): all_core_ids = device.pack(all_core_ids, layout) all_core_ids = math_ops.reduce_sum(all_core_ids, axis=[0]) unpacked_all_tpu_ids = device.unpack(all_core_ids) all_core_ids = list(unpacked_all_tpu_ids[0].numpy()) logging.info("All TPU core IDs: %s", all_core_ids) # Set the default core ID mappings in the runtime for legacy code and tests. # # Legacy code and tests create TPU meshes directly without using the # `create_tpu_mesh` function below. Those meshes have global device IDs # equal to TF task-device ordinals. The `all_core_ids` array happens to # arrange core IDs by TF task-device ordinals. Using this array on those # meshes guarantee correct although inefficient results. device.set_tpu_core_ids("", all_core_ids) # Remember enough global, immutable information to be able to build any ring # we want prescribed by `create_tpu_mesh` in the future. global _all_core_ids _all_core_ids = all_core_ids all_core_locations = device.tpu_core_ids_to_locations(all_core_ids) all_core_locations = [ _CoreLocation(l[0], l[1], l[2], l[3]) for l in all_core_locations ] global _all_core_locations _all_core_locations = all_core_locations logging.info("All TPU core locations: %s", all_core_locations) tpu_topology = _create_tpu_topology(all_core_locations, num_tasks, num_devices_per_task) global _tpu_topology _tpu_topology = tpu_topology logging.vlog(1, "TPU Topology: %s, %s", tpu_topology.mesh_shape, tpu_topology.device_coordinates) global _dtensor_device _dtensor_device = device context.async_wait() except errors.InvalidArgumentError as e: raise errors.NotFoundError( None, None, "Initialization failed, no valid TPUs found. " + str(e)) except errors.InternalError as e: logging.error( "Hit internal error during TPU system initialization. " + "It is likely hareware failure. \nPlease check the error " + "messages above to see whether that's the case. \nIf so, " + "consider to restart the job or try another machine.") raise e # Optionally exchange heartbeats between workers every minute. if in_multi_client_mode and api.heartbeat_enabled(): logging.info( "Starting DTensor heartbeat service exchanging signals every 10 minutes" ) heartbeat.start(period=180) # Clear out the eager context caches since the memory is invalid now. logging.info("Clearing out eager caches") context.context()._clear_caches() # pylint: disable=protected-access
def sdca_optimizer(sparse_example_indices, sparse_feature_indices, sparse_feature_values, dense_features, example_weights, example_labels, sparse_indices, sparse_weights, dense_weights, example_state_data, loss_type, l1, l2, num_loss_partitions, num_inner_iterations, adaptative=False, name=None): r"""Distributed version of Stochastic Dual Coordinate Ascent (SDCA) optimizer for linear models with L1 + L2 regularization. As global optimization objective is strongly-convex, the optimizer optimizes the dual objective at each step. The optimizer applies each update one example at a time. Examples are sampled uniformly, and the optimizer is learning rate free and enjoys linear convergence rate. [Proximal Stochastic Dual Coordinate Ascent](http://arxiv.org/pdf/1211.2717v1.pdf).<br> Shai Shalev-Shwartz, Tong Zhang. 2012 $$Loss Objective = \sum f_{i} (wx_{i}) + (l2 / 2) * |w|^2 + l1 * |w|$$ [Adding vs. Averaging in Distributed Primal-Dual Optimization](http://arxiv.org/abs/1502.03508).<br> Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, Peter Richtarik, Martin Takac. 2015 [Stochastic Dual Coordinate Ascent with Adaptive Probabilities](https://arxiv.org/abs/1502.08053).<br> Dominik Csiba, Zheng Qu, Peter Richtarik. 2015 Args: sparse_example_indices: A list of `Tensor` objects with type `int64`. a list of vectors which contain example indices. sparse_feature_indices: A list with the same length as `sparse_example_indices` of `Tensor` objects with type `int64`. a list of vectors which contain feature indices. sparse_feature_values: A list of `Tensor` objects with type `float32`. a list of vectors which contains feature value associated with each feature group. dense_features: A list of `Tensor` objects with type `float32`. a list of matrices which contains the dense feature values. example_weights: A `Tensor` of type `float32`. a vector which contains the weight associated with each example. example_labels: A `Tensor` of type `float32`. a vector which contains the label/target associated with each example. sparse_indices: A list with the same length as `sparse_example_indices` of `Tensor` objects with type `int64`. a list of vectors where each value is the indices which has corresponding weights in sparse_weights. This field maybe omitted for the dense approach. sparse_weights: A list with the same length as `sparse_example_indices` of `Tensor` objects with type `float32`. a list of vectors where each value is the weight associated with a sparse feature group. dense_weights: A list with the same length as `dense_features` of `Tensor` objects with type `float32`. a list of vectors where the values are the weights associated with a dense feature group. example_state_data: A `Tensor` of type `float32`. a list of vectors containing the example state data. loss_type: A `string` from: `"logistic_loss", "squared_loss", "hinge_loss", "smooth_hinge_loss"`. Type of the primal loss. Currently SdcaSolver supports logistic, squared and hinge losses. l1: A `float`. Symmetric l1 regularization strength. l2: A `float`. Symmetric l2 regularization strength. num_loss_partitions: An `int` that is `>= 1`. Number of partitions of the global loss function. num_inner_iterations: An `int` that is `>= 1`. Number of iterations per mini-batch. adaptative: An optional `bool`. Defaults to `False`. Whether to use Adapative SDCA for the inner loop. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (out_example_state_data, out_delta_sparse_weights, out_delta_dense_weights). out_example_state_data: A `Tensor` of type `float32`. out_delta_sparse_weights: A list with the same length as `sparse_example_indices` of `Tensor` objects with type `float32`. out_delta_dense_weights: A list with the same length as `dense_features` of `Tensor` objects with type `float32`. """ _ctx = _context.context() if not _ctx.executing_eagerly(): if not isinstance(sparse_example_indices, (list, tuple)): raise TypeError( "Expected list for 'sparse_example_indices' argument to " "'sdca_optimizer' Op, not %r." % sparse_example_indices) _attr_num_sparse_features = len(sparse_example_indices) if not isinstance(sparse_feature_indices, (list, tuple)): raise TypeError( "Expected list for 'sparse_feature_indices' argument to " "'sdca_optimizer' Op, not %r." % sparse_feature_indices) if len(sparse_feature_indices) != _attr_num_sparse_features: raise ValueError( "List argument 'sparse_feature_indices' to 'sdca_optimizer' Op with length %d " "must match length %d of argument 'sparse_example_indices'." % (len(sparse_feature_indices), _attr_num_sparse_features)) if not isinstance(sparse_indices, (list, tuple)): raise TypeError( "Expected list for 'sparse_indices' argument to " "'sdca_optimizer' Op, not %r." % sparse_indices) if len(sparse_indices) != _attr_num_sparse_features: raise ValueError( "List argument 'sparse_indices' to 'sdca_optimizer' Op with length %d " "must match length %d of argument 'sparse_example_indices'." % (len(sparse_indices), _attr_num_sparse_features)) if not isinstance(sparse_weights, (list, tuple)): raise TypeError( "Expected list for 'sparse_weights' argument to " "'sdca_optimizer' Op, not %r." % sparse_weights) if len(sparse_weights) != _attr_num_sparse_features: raise ValueError( "List argument 'sparse_weights' to 'sdca_optimizer' Op with length %d " "must match length %d of argument 'sparse_example_indices'." % (len(sparse_weights), _attr_num_sparse_features)) if not isinstance(sparse_feature_values, (list, tuple)): raise TypeError( "Expected list for 'sparse_feature_values' argument to " "'sdca_optimizer' Op, not %r." % sparse_feature_values) _attr_num_sparse_features_with_values = len(sparse_feature_values) if not isinstance(dense_features, (list, tuple)): raise TypeError( "Expected list for 'dense_features' argument to " "'sdca_optimizer' Op, not %r." % dense_features) _attr_num_dense_features = len(dense_features) if not isinstance(dense_weights, (list, tuple)): raise TypeError( "Expected list for 'dense_weights' argument to " "'sdca_optimizer' Op, not %r." % dense_weights) if len(dense_weights) != _attr_num_dense_features: raise ValueError( "List argument 'dense_weights' to 'sdca_optimizer' Op with length %d " "must match length %d of argument 'dense_features'." % (len(dense_weights), _attr_num_dense_features)) loss_type = _execute.make_str(loss_type, "loss_type") l1 = _execute.make_float(l1, "l1") l2 = _execute.make_float(l2, "l2") num_loss_partitions = _execute.make_int(num_loss_partitions, "num_loss_partitions") num_inner_iterations = _execute.make_int(num_inner_iterations, "num_inner_iterations") if adaptative is None: adaptative = False adaptative = _execute.make_bool(adaptative, "adaptative") _, _, _op = _op_def_lib._apply_op_helper( "SdcaOptimizer", sparse_example_indices=sparse_example_indices, sparse_feature_indices=sparse_feature_indices, sparse_feature_values=sparse_feature_values, dense_features=dense_features, example_weights=example_weights, example_labels=example_labels, sparse_indices=sparse_indices, sparse_weights=sparse_weights, dense_weights=dense_weights, example_state_data=example_state_data, loss_type=loss_type, l1=l1, l2=l2, num_loss_partitions=num_loss_partitions, num_inner_iterations=num_inner_iterations, adaptative=adaptative, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("loss_type", _op.get_attr("loss_type"), "adaptative", _op.get_attr("adaptative"), "num_sparse_features", _op.get_attr("num_sparse_features"), "num_sparse_features_with_values", _op.get_attr("num_sparse_features_with_values"), "num_dense_features", _op.get_attr("num_dense_features"), "l1", _op.get_attr("l1"), "l2", _op.get_attr("l2"), "num_loss_partitions", _op.get_attr("num_loss_partitions"), "num_inner_iterations", _op.get_attr("num_inner_iterations")) _execute.record_gradient( "SdcaOptimizer", _inputs_flat, _attrs, _result, name) _result = _result[:1] + [_result[1:1 + _attr_num_sparse_features]] + _result[1 + _attr_num_sparse_features:] _result = _result[:2] + [_result[2:]] _result = _SdcaOptimizerOutput._make(_result) return _result else: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._handle, _ctx.device_name, "SdcaOptimizer", name, _ctx._post_execution_callbacks, sparse_example_indices, sparse_feature_indices, sparse_feature_values, dense_features, example_weights, example_labels, sparse_indices, sparse_weights, dense_weights, example_state_data, "loss_type", loss_type, "adaptative", adaptative, "l1", l1, "l2", l2, "num_loss_partitions", num_loss_partitions, "num_inner_iterations", num_inner_iterations) _result = _SdcaOptimizerOutput._make(_result) return _result except _core._FallbackException: return sdca_optimizer_eager_fallback( sparse_example_indices, sparse_feature_indices, sparse_feature_values, dense_features, example_weights, example_labels, sparse_indices, sparse_weights, dense_weights, example_state_data, loss_type=loss_type, adaptative=adaptative, l1=l1, l2=l2, num_loss_partitions=num_loss_partitions, num_inner_iterations=num_inner_iterations, name=name) except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None)
def sdca_optimizer_eager_fallback(sparse_example_indices, sparse_feature_indices, sparse_feature_values, dense_features, example_weights, example_labels, sparse_indices, sparse_weights, dense_weights, example_state_data, loss_type, l1, l2, num_loss_partitions, num_inner_iterations, adaptative=False, name=None): r"""This is the slowpath function for Eager mode. This is for function sdca_optimizer """ _ctx = _context.context() if not isinstance(sparse_example_indices, (list, tuple)): raise TypeError( "Expected list for 'sparse_example_indices' argument to " "'sdca_optimizer' Op, not %r." % sparse_example_indices) _attr_num_sparse_features = len(sparse_example_indices) if not isinstance(sparse_feature_indices, (list, tuple)): raise TypeError( "Expected list for 'sparse_feature_indices' argument to " "'sdca_optimizer' Op, not %r." % sparse_feature_indices) if len(sparse_feature_indices) != _attr_num_sparse_features: raise ValueError( "List argument 'sparse_feature_indices' to 'sdca_optimizer' Op with length %d " "must match length %d of argument 'sparse_example_indices'." % (len(sparse_feature_indices), _attr_num_sparse_features)) if not isinstance(sparse_indices, (list, tuple)): raise TypeError( "Expected list for 'sparse_indices' argument to " "'sdca_optimizer' Op, not %r." % sparse_indices) if len(sparse_indices) != _attr_num_sparse_features: raise ValueError( "List argument 'sparse_indices' to 'sdca_optimizer' Op with length %d " "must match length %d of argument 'sparse_example_indices'." % (len(sparse_indices), _attr_num_sparse_features)) if not isinstance(sparse_weights, (list, tuple)): raise TypeError( "Expected list for 'sparse_weights' argument to " "'sdca_optimizer' Op, not %r." % sparse_weights) if len(sparse_weights) != _attr_num_sparse_features: raise ValueError( "List argument 'sparse_weights' to 'sdca_optimizer' Op with length %d " "must match length %d of argument 'sparse_example_indices'." % (len(sparse_weights), _attr_num_sparse_features)) if not isinstance(sparse_feature_values, (list, tuple)): raise TypeError( "Expected list for 'sparse_feature_values' argument to " "'sdca_optimizer' Op, not %r." % sparse_feature_values) _attr_num_sparse_features_with_values = len(sparse_feature_values) if not isinstance(dense_features, (list, tuple)): raise TypeError( "Expected list for 'dense_features' argument to " "'sdca_optimizer' Op, not %r." % dense_features) _attr_num_dense_features = len(dense_features) if not isinstance(dense_weights, (list, tuple)): raise TypeError( "Expected list for 'dense_weights' argument to " "'sdca_optimizer' Op, not %r." % dense_weights) if len(dense_weights) != _attr_num_dense_features: raise ValueError( "List argument 'dense_weights' to 'sdca_optimizer' Op with length %d " "must match length %d of argument 'dense_features'." % (len(dense_weights), _attr_num_dense_features)) loss_type = _execute.make_str(loss_type, "loss_type") l1 = _execute.make_float(l1, "l1") l2 = _execute.make_float(l2, "l2") num_loss_partitions = _execute.make_int(num_loss_partitions, "num_loss_partitions") num_inner_iterations = _execute.make_int(num_inner_iterations, "num_inner_iterations") if adaptative is None: adaptative = False adaptative = _execute.make_bool(adaptative, "adaptative") sparse_example_indices = _ops.convert_n_to_tensor(sparse_example_indices, _dtypes.int64) sparse_feature_indices = _ops.convert_n_to_tensor(sparse_feature_indices, _dtypes.int64) sparse_feature_values = _ops.convert_n_to_tensor(sparse_feature_values, _dtypes.float32) dense_features = _ops.convert_n_to_tensor(dense_features, _dtypes.float32) example_weights = _ops.convert_to_tensor(example_weights, _dtypes.float32) example_labels = _ops.convert_to_tensor(example_labels, _dtypes.float32) sparse_indices = _ops.convert_n_to_tensor(sparse_indices, _dtypes.int64) sparse_weights = _ops.convert_n_to_tensor(sparse_weights, _dtypes.float32) dense_weights = _ops.convert_n_to_tensor(dense_weights, _dtypes.float32) example_state_data = _ops.convert_to_tensor(example_state_data, _dtypes.float32) _inputs_flat = list(sparse_example_indices) + list(sparse_feature_indices) + list(sparse_feature_values) + list(dense_features) + [example_weights, example_labels] + list(sparse_indices) + list(sparse_weights) + list(dense_weights) + [example_state_data] _attrs = ("loss_type", loss_type, "adaptative", adaptative, "num_sparse_features", _attr_num_sparse_features, "num_sparse_features_with_values", _attr_num_sparse_features_with_values, "num_dense_features", _attr_num_dense_features, "l1", l1, "l2", l2, "num_loss_partitions", num_loss_partitions, "num_inner_iterations", num_inner_iterations) _result = _execute.execute(b"SdcaOptimizer", _attr_num_sparse_features + _attr_num_dense_features + 1, inputs=_inputs_flat, attrs=_attrs, ctx=_ctx, name=name) _execute.record_gradient( "SdcaOptimizer", _inputs_flat, _attrs, _result, name) _result = _result[:1] + [_result[1:1 + _attr_num_sparse_features]] + _result[1 + _attr_num_sparse_features:] _result = _result[:2] + [_result[2:]] _result = _SdcaOptimizerOutput._make(_result) return _result
def single_image_random_dot_stereograms(depth_values, hidden_surface_removal=True, convergence_dots_size=8, dots_per_inch=72, eye_separation=2.5, mu=0.3333, normalize=True, normalize_max=-100, normalize_min=100, border_level=0, number_colors=256, output_image_shape=[1024, 768, 1], output_data_window=[1022, 757], name=None): r"""Outputs a single image random dot stereogram for export via encode_PNG/JPG OP. Given the 2-D tensor 'depth_values' with encoded Z values, this operation will encode 3-D data into a 2-D image. The output of this Op is suitable for the encode_PNG/JPG ops. Be careful with image compression as this may corrupt the encode 3-D data within the image. This Op is based upon: 'http://www.learningace.com/doc/4331582/b6ab058d1e206d68ab60e4e1ead2fe6e/sirds-paper' Example use which outputs a SIRDS image as picture_out.png: ```python img=[[1,2,3,3,2,1], [1,2,3,4,5,2], [1,2,3,4,5,3], [1,2,3,4,5,4], [6,5,4,4,5,5]] session = tf.InteractiveSession() sirds = single_image_random_dot_stereograms(img,convergence_dots_size=8,number_colors=256,normalize=True) out = sirds.eval() png = tf.image.encode_png(out).eval() with open('picture_out.png', 'wb') as f: f.write(png) ``` Args: depth_values: A `Tensor`. Must be one of the following types: `float64`, `float32`, `int64`, `int32`. Z values of data to encode into 'output_data_window' window, lower values are further away {0.0 floor(far), 1.0 ceiling(near) after normalization}, must be 2-D tensor hidden_surface_removal: An optional `bool`. Defaults to `True`. Activate hidden surface removal convergence_dots_size: An optional `int`. Defaults to `8`. Black dot size in pixels to help view converge image, drawn on bottom of image dots_per_inch: An optional `int`. Defaults to `72`. Output device in dots/inch eye_separation: An optional `float`. Defaults to `2.5`. Separation between eyes in inches mu: An optional `float`. Defaults to `0.3333`. Depth of field, Fraction of viewing distance (eg. 1/3 = .3333) normalize: An optional `bool`. Defaults to `True`. Normalize input data to [0.0, 1.0] normalize_max: An optional `float`. Defaults to `-100`. Fix MAX value for Normalization - if < MIN, autoscale normalize_min: An optional `float`. Defaults to `100`. Fix MIN value for Normalization - if > MAX, autoscale border_level: An optional `float`. Defaults to `0`. Value of border depth 0.0 {far} to 1.0 {near} number_colors: An optional `int`. Defaults to `256`. 2 (Black & White),256 (grayscale), and Numbers > 256 (Full Color) are all that are supported currently output_image_shape: An optional `tf.TensorShape` or list of `ints`. Defaults to `[1024, 768, 1]`. Output size of returned image in X,Y, Channels 1-grayscale, 3 color (1024, 768, 1), channels will be updated to 3 if 'number_colors' > 256 output_data_window: An optional `tf.TensorShape` or list of `ints`. Defaults to `[1022, 757]`. Size of "DATA" window, must be equal to or smaller than 'output_image_shape', will be centered and use 'convergence_dots_size' for best fit to avoid overlap if possible name: A name for the operation (optional). Returns: A tensor of size 'output_image_shape' with the encoded 'depth_values' """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "SingleImageRandomDotStereograms", name, _ctx._post_execution_callbacks, depth_values, "hidden_surface_removal", hidden_surface_removal, "convergence_dots_size", convergence_dots_size, "dots_per_inch", dots_per_inch, "eye_separation", eye_separation, "mu", mu, "normalize", normalize, "normalize_max", normalize_max, "normalize_min", normalize_min, "border_level", border_level, "number_colors", number_colors, "output_image_shape", output_image_shape, "output_data_window", output_data_window) return _result except _core._FallbackException: try: return single_image_random_dot_stereograms_eager_fallback( depth_values, hidden_surface_removal=hidden_surface_removal, convergence_dots_size=convergence_dots_size, dots_per_inch=dots_per_inch, eye_separation=eye_separation, mu=mu, normalize=normalize, normalize_max=normalize_max, normalize_min=normalize_min, border_level=border_level, number_colors=number_colors, output_image_shape=output_image_shape, output_data_window=output_data_window, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except (TypeError, ValueError): result = _dispatch.dispatch( single_image_random_dot_stereograms, depth_values=depth_values, hidden_surface_removal=hidden_surface_removal, convergence_dots_size=convergence_dots_size, dots_per_inch=dots_per_inch, eye_separation=eye_separation, mu=mu, normalize=normalize, normalize_max=normalize_max, normalize_min=normalize_min, border_level=border_level, number_colors=number_colors, output_image_shape=output_image_shape, output_data_window=output_data_window, name=name) if result is not _dispatch.OpDispatcher.NOT_SUPPORTED: return result raise except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. if hidden_surface_removal is None: hidden_surface_removal = True hidden_surface_removal = _execute.make_bool(hidden_surface_removal, "hidden_surface_removal") if convergence_dots_size is None: convergence_dots_size = 8 convergence_dots_size = _execute.make_int(convergence_dots_size, "convergence_dots_size") if dots_per_inch is None: dots_per_inch = 72 dots_per_inch = _execute.make_int(dots_per_inch, "dots_per_inch") if eye_separation is None: eye_separation = 2.5 eye_separation = _execute.make_float(eye_separation, "eye_separation") if mu is None: mu = 0.3333 mu = _execute.make_float(mu, "mu") if normalize is None: normalize = True normalize = _execute.make_bool(normalize, "normalize") if normalize_max is None: normalize_max = -100 normalize_max = _execute.make_float(normalize_max, "normalize_max") if normalize_min is None: normalize_min = 100 normalize_min = _execute.make_float(normalize_min, "normalize_min") if border_level is None: border_level = 0 border_level = _execute.make_float(border_level, "border_level") if number_colors is None: number_colors = 256 number_colors = _execute.make_int(number_colors, "number_colors") if output_image_shape is None: output_image_shape = [1024, 768, 1] output_image_shape = _execute.make_shape(output_image_shape, "output_image_shape") if output_data_window is None: output_data_window = [1022, 757] output_data_window = _execute.make_shape(output_data_window, "output_data_window") try: _, _, _op = _op_def_lib._apply_op_helper( "SingleImageRandomDotStereograms", depth_values=depth_values, hidden_surface_removal=hidden_surface_removal, convergence_dots_size=convergence_dots_size, dots_per_inch=dots_per_inch, eye_separation=eye_separation, mu=mu, normalize=normalize, normalize_max=normalize_max, normalize_min=normalize_min, border_level=border_level, number_colors=number_colors, output_image_shape=output_image_shape, output_data_window=output_data_window, name=name) except (TypeError, ValueError): result = _dispatch.dispatch( single_image_random_dot_stereograms, depth_values=depth_values, hidden_surface_removal=hidden_surface_removal, convergence_dots_size=convergence_dots_size, dots_per_inch=dots_per_inch, eye_separation=eye_separation, mu=mu, normalize=normalize, normalize_max=normalize_max, normalize_min=normalize_min, border_level=border_level, number_colors=number_colors, output_image_shape=output_image_shape, output_data_window=output_data_window, name=name) if result is not _dispatch.OpDispatcher.NOT_SUPPORTED: return result raise _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("T", _op.get_attr("T"), "hidden_surface_removal", _op.get_attr("hidden_surface_removal"), "convergence_dots_size", _op.get_attr("convergence_dots_size"), "dots_per_inch", _op.get_attr("dots_per_inch"), "eye_separation", _op.get_attr("eye_separation"), "mu", _op.get_attr("mu"), "normalize", _op.get_attr("normalize"), "normalize_max", _op.get_attr("normalize_max"), "normalize_min", _op.get_attr("normalize_min"), "border_level", _op.get_attr("border_level"), "number_colors", _op.get_attr("number_colors"), "output_image_shape", _op.get_attr("output_image_shape"), "output_data_window", _op.get_attr("output_data_window")) _execute.record_gradient("SingleImageRandomDotStereograms", _inputs_flat, _attrs, _result, name) _result, = _result return _result
def debug_numeric_summary(input, device_name="", tensor_name="", debug_urls=[], lower_bound=float('-inf'), upper_bound=float('inf'), mute_if_healthy=False, gated_grpc=False, name=None): r"""Debug Numeric Summary Op. Provide a basic summary of numeric value types, range and distribution. output: A double tensor of shape [14 + nDimensions], where nDimensions is the number of dimensions of the tensor's shape. The elements of output are: [0]: is initialized (1.0) or not (0.0). [1]: total number of elements [2]: NaN element count [3]: generalized -inf count: elements <= lower_bound. lower_bound is -inf by default. [4]: negative element count (excluding -inf), if lower_bound is the default -inf. Otherwise, this is the count of elements > lower_bound and < 0. [5]: zero element count [6]: positive element count (excluding +inf), if upper_bound is the default +inf. Otherwise, this is the count of elements < upper_bound and > 0. [7]: generalized +inf count, elements >= upper_bound. upper_bound is +inf by default. Output elements [1:8] are all zero, if the tensor is uninitialized. [8]: minimum of all non-inf and non-NaN elements. If uninitialized or no such element exists: +inf. [9]: maximum of all non-inf and non-NaN elements. If uninitialized or no such element exists: -inf. [10]: mean of all non-inf and non-NaN elements. If uninitialized or no such element exists: NaN. [11]: variance of all non-inf and non-NaN elements. If uninitialized or no such element exists: NaN. [12]: Data type of the tensor encoded as an enum integer. See the DataType proto for more details. [13]: Number of dimensions of the tensor (ndims). [14+]: Sizes of the dimensions. Args: input: A `Tensor`. Input tensor, non-Reference type. device_name: An optional `string`. Defaults to `""`. tensor_name: An optional `string`. Defaults to `""`. Name of the input tensor. debug_urls: An optional list of `strings`. Defaults to `[]`. List of URLs to debug targets, e.g., file:///foo/tfdbg_dump, grpc:://localhost:11011. lower_bound: An optional `float`. Defaults to `float('-inf')`. (float) The lower bound <= which values will be included in the generalized -inf count. Default: -inf. upper_bound: An optional `float`. Defaults to `float('inf')`. (float) The upper bound >= which values will be included in the generalized +inf count. Default: +inf. mute_if_healthy: An optional `bool`. Defaults to `False`. (bool) Do not send data to the debug URLs unless at least one of elements [2], [3] and [7] (i.e., the nan count and the generalized -inf and inf counts) is non-zero. gated_grpc: An optional `bool`. Defaults to `False`. Whether this op will be gated. If any of the debug_urls of this debug node is of the grpc:// scheme, when the value of this attribute is set to True, the data will not actually be sent via the grpc stream unless this debug op has been enabled at the debug_url. If all of the debug_urls of this debug node are of the grpc:// scheme and the debug op is enabled at none of them, the output will be an empty Tensor. name: A name for the operation (optional). Returns: A `Tensor` of type `float64`. """ _ctx = _context._context or _context.context() tld = _ctx._thread_local_data if tld.is_eager: try: _result = pywrap_tfe.TFE_Py_FastPathExecute( _ctx._context_handle, tld.device_name, "DebugNumericSummary", name, tld.op_callbacks, input, "device_name", device_name, "tensor_name", tensor_name, "debug_urls", debug_urls, "lower_bound", lower_bound, "upper_bound", upper_bound, "mute_if_healthy", mute_if_healthy, "gated_grpc", gated_grpc) return _result except _core._NotOkStatusException as e: _ops.raise_from_not_ok_status(e, name) except _core._FallbackException: pass try: return debug_numeric_summary_eager_fallback( input, device_name=device_name, tensor_name=tensor_name, debug_urls=debug_urls, lower_bound=lower_bound, upper_bound=upper_bound, mute_if_healthy=mute_if_healthy, gated_grpc=gated_grpc, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. # Add nodes to the TensorFlow graph. if device_name is None: device_name = "" device_name = _execute.make_str(device_name, "device_name") if tensor_name is None: tensor_name = "" tensor_name = _execute.make_str(tensor_name, "tensor_name") if debug_urls is None: debug_urls = [] if not isinstance(debug_urls, (list, tuple)): raise TypeError("Expected list for 'debug_urls' argument to " "'debug_numeric_summary' Op, not %r." % debug_urls) debug_urls = [_execute.make_str(_s, "debug_urls") for _s in debug_urls] if lower_bound is None: lower_bound = float('-inf') lower_bound = _execute.make_float(lower_bound, "lower_bound") if upper_bound is None: upper_bound = float('inf') upper_bound = _execute.make_float(upper_bound, "upper_bound") if mute_if_healthy is None: mute_if_healthy = False mute_if_healthy = _execute.make_bool(mute_if_healthy, "mute_if_healthy") if gated_grpc is None: gated_grpc = False gated_grpc = _execute.make_bool(gated_grpc, "gated_grpc") _, _, _op, _outputs = _op_def_library._apply_op_helper( "DebugNumericSummary", input=input, device_name=device_name, tensor_name=tensor_name, debug_urls=debug_urls, lower_bound=lower_bound, upper_bound=upper_bound, mute_if_healthy=mute_if_healthy, gated_grpc=gated_grpc, name=name) _result = _outputs[:] if _execute.must_record_gradient(): _attrs = ("T", _op._get_attr_type("T"), "device_name", _op.get_attr("device_name"), "tensor_name", _op.get_attr("tensor_name"), "debug_urls", _op.get_attr("debug_urls"), "lower_bound", _op.get_attr("lower_bound"), "upper_bound", _op.get_attr("upper_bound"), "mute_if_healthy", _op._get_attr_bool("mute_if_healthy"), "gated_grpc", _op._get_attr_bool("gated_grpc")) _inputs_flat = _op.inputs _execute.record_gradient("DebugNumericSummary", _inputs_flat, _attrs, _result) _result, = _result return _result
def generate_big_query_reader_partitions(project_id, dataset_id, table_id, columns, timestamp_millis, num_partitions, test_end_point="", name=None): r"""Generates serialized partition messages suitable for batch reads. This op should not be used directly by clients. Instead, the bigquery_reader_ops.py file defines a clean interface to the reader. Args: project_id: A `string`. GCP project ID. dataset_id: A `string`. BigQuery Dataset ID. table_id: A `string`. Table to read. columns: A list of `strings`. List of columns to read. Leave empty to read all columns. timestamp_millis: An `int`. Table snapshot timestamp in millis since epoch. Relative (negative or zero) snapshot times are not allowed. For more details, see 'Table Decorators' in BigQuery docs. num_partitions: An `int`. Number of partitions to split the table into. test_end_point: An optional `string`. Defaults to `""`. Do not use. For testing purposes only. name: A name for the operation (optional). Returns: A `Tensor` of type `string`. Serialized table partitions. """ project_id = _execute.make_str(project_id, "project_id") dataset_id = _execute.make_str(dataset_id, "dataset_id") table_id = _execute.make_str(table_id, "table_id") if not isinstance(columns, (list, tuple)): raise TypeError( "Expected list for 'columns' argument to " "'generate_big_query_reader_partitions' Op, not %r." % columns) columns = [_execute.make_str(_s, "columns") for _s in columns] timestamp_millis = _execute.make_int(timestamp_millis, "timestamp_millis") num_partitions = _execute.make_int(num_partitions, "num_partitions") if test_end_point is None: test_end_point = "" test_end_point = _execute.make_str(test_end_point, "test_end_point") _ctx = _context.context() if _ctx.in_graph_mode(): _, _, _op = _op_def_lib._apply_op_helper( "GenerateBigQueryReaderPartitions", project_id=project_id, dataset_id=dataset_id, table_id=table_id, columns=columns, timestamp_millis=timestamp_millis, num_partitions=num_partitions, test_end_point=test_end_point, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("project_id", _op.get_attr("project_id"), "dataset_id", _op.get_attr("dataset_id"), "table_id", _op.get_attr("table_id"), "columns", _op.get_attr("columns"), "timestamp_millis", _op.get_attr("timestamp_millis"), "num_partitions", _op.get_attr("num_partitions"), "test_end_point", _op.get_attr("test_end_point")) else: _inputs_flat = [] _attrs = ("project_id", project_id, "dataset_id", dataset_id, "table_id", table_id, "columns", columns, "timestamp_millis", timestamp_millis, "num_partitions", num_partitions, "test_end_point", test_end_point) _result = _execute.execute(b"GenerateBigQueryReaderPartitions", 1, inputs=_inputs_flat, attrs=_attrs, ctx=_ctx, name=name) _execute.record_gradient( "GenerateBigQueryReaderPartitions", _inputs_flat, _attrs, _result, name) _result, = _result return _result
def testInt32CPUDefault(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') with context.device('/gpu:0'): r = constant_op.constant(1) + constant_op.constant(2) self.assertAllEqual(r, 3)
def ragged_tensor_to_variant(rt_nested_splits, rt_dense_values, batched_input, name=None): r"""Encodes a `RaggedTensor` into a `variant` Tensor. Encodes the given `RaggedTensor` and returns a `variant` Tensor. If `batched_input` is True, then input `RaggedTensor` is unbatched along the zero-th dimension, each component `RaggedTensor` is encoded into a scalar `variant` Tensor, and these are stacked to return a 1-D `variant` Tensor. If `batched_input` is False, then the input `RaggedTensor` is encoded as is and a scalar `variant` Tensor is returned. A `RaggedTensor` is encoded by first creating a 1-D `variant` Tensor with `ragged_rank + 1` elements, containing the splits and values Tensors of the `RaggedTensor`. Then the 1-D `variant` Tensor is wrapped in a scalar `variant` Tensor. See `RaggedTensorFromVariant` for the corresponding decoding logic. Args: rt_nested_splits: A list of at least 1 `Tensor` objects with the same type in: `int32`, `int64`. A list of one or more Tensors representing the splits of the input `RaggedTensor`. rt_dense_values: A `Tensor`. A Tensor representing the values of the input `RaggedTensor`. batched_input: A `bool`. A `bool` denoting whether the input is a batched `RaggedTensor`. name: A name for the operation (optional). Returns: A `Tensor` of type `variant`. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "RaggedTensorToVariant", name, _ctx._post_execution_callbacks, rt_nested_splits, rt_dense_values, "batched_input", batched_input) return _result except _core._FallbackException: try: return ragged_tensor_to_variant_eager_fallback( rt_nested_splits, rt_dense_values, batched_input=batched_input, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. if not isinstance(rt_nested_splits, (list, tuple)): raise TypeError("Expected list for 'rt_nested_splits' argument to " "'ragged_tensor_to_variant' Op, not %r." % rt_nested_splits) _attr_RAGGED_RANK = len(rt_nested_splits) batched_input = _execute.make_bool(batched_input, "batched_input") _, _, _op = _op_def_lib._apply_op_helper("RaggedTensorToVariant", rt_nested_splits=rt_nested_splits, rt_dense_values=rt_dense_values, batched_input=batched_input, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("RAGGED_RANK", _op.get_attr("RAGGED_RANK"), "Tvalues", _op.get_attr("Tvalues"), "Tsplits", _op.get_attr("Tsplits"), "batched_input", _op.get_attr("batched_input")) _execute.record_gradient("RaggedTensorToVariant", _inputs_flat, _attrs, _result, name) _result, = _result return _result
def testNotShareGPU(self): self.assertLen(context.context().list_physical_devices("GPU"), combinations.env().total_phsyical_gpus / 2)
def copy(input, tensor_name="", debug_ops_spec=[], name=None): r"""Copy a tensor from CPU-to-CPU or GPU-to-GPU. Performs CPU-to-CPU or GPU-to-GPU deep-copying of tensor, depending on the device on which the tensor is allocated. N.B.: If the all downstream attached debug ops are disabled given the current gRPC gating status, the output will simply forward the input tensor without deep-copying. See the documentation of Debug* ops for more details. Unlike the CopyHost Op, this op does not have HostMemory constraint on its input or output. Args: input: A `Tensor`. Input tensor. tensor_name: An optional `string`. Defaults to `""`. The name of the input tensor. debug_ops_spec: An optional list of `strings`. Defaults to `[]`. A list of debug op spec (op, url, gated_grpc) for attached debug ops. Each element of the list has the format <debug_op>;<grpc_url>;<gated_grpc>, wherein gated_grpc is boolean represented as 0/1. E.g., "DebugIdentity;grpc://foo:3333;1", "DebugIdentity;file:///tmp/tfdbg_1;0". name: A name for the operation (optional). Returns: A `Tensor`. Has the same type as `input`. """ _ctx = _context._context or _context.context() tld = _ctx._thread_local_data if tld.is_eager: try: _result = pywrap_tfe.TFE_Py_FastPathExecute( _ctx._context_handle, tld.device_name, "Copy", name, tld.op_callbacks, input, "tensor_name", tensor_name, "debug_ops_spec", debug_ops_spec) return _result except _core._NotOkStatusException as e: _ops.raise_from_not_ok_status(e, name) except _core._FallbackException: pass try: return copy_eager_fallback(input, tensor_name=tensor_name, debug_ops_spec=debug_ops_spec, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. # Add nodes to the TensorFlow graph. if tensor_name is None: tensor_name = "" tensor_name = _execute.make_str(tensor_name, "tensor_name") if debug_ops_spec is None: debug_ops_spec = [] if not isinstance(debug_ops_spec, (list, tuple)): raise TypeError("Expected list for 'debug_ops_spec' argument to " "'copy' Op, not %r." % debug_ops_spec) debug_ops_spec = [ _execute.make_str(_s, "debug_ops_spec") for _s in debug_ops_spec ] _, _, _op, _outputs = _op_def_library._apply_op_helper( "Copy", input=input, tensor_name=tensor_name, debug_ops_spec=debug_ops_spec, name=name) _result = _outputs[:] if _execute.must_record_gradient(): _attrs = ("T", _op._get_attr_type("T"), "tensor_name", _op.get_attr("tensor_name"), "debug_ops_spec", _op.get_attr("debug_ops_spec")) _inputs_flat = _op.inputs _execute.record_gradient("Copy", _inputs_flat, _attrs, _result) _result, = _result return _result
def reduce_slice_sum(data, indices, axis, name=None): r"""Dynamically sum over the first dimension of a tensor according to start and end indices specified at 'index'. For example: ```prettyprint # if 'data' is [[ 1, 2, 3] [ 40, 50, 60] [ 700, 800, 900] [1000,2000,3000]], and 'indices' is [[0,1] [1,1] [0,2]], the output will be [[ 1, 2, 3] [ 0, 0, 0] [41,52,63]]. ``` The data must be at least rank 1. The indices must be of shape (?,2) where the first column is start indices and the second column is end indices. The end indices are not included in the reduce operation, which means, if you want to do a reduce over indices 0,1,2, then you should have start index 0 and end index 3. If end index is smaller than or equal to start, the result will be zero. If end index is out of bounds, then the reduce operation will automatically stop at the bound, so feel free to put a large number as your end of your index if you want to do the reduction until the bound. Args: data: A `Tensor`. Must be one of the following types: `float32`, `float64`, `int32`, `uint8`, `int16`, `int8`, `complex64`, `int64`, `qint8`, `quint8`, `qint32`, `bfloat16`, `uint16`, `complex128`, `half`, `uint32`, `uint64`. The source of data where the computation will be taken from. indices: A `Tensor`. Must be one of the following types: `int32`, `int64`. start, end indices that controls which part to be included. axis: A `Tensor` of type `int64`. name: A name for the operation (optional). Returns: A `Tensor`. Has the same type as `data`. the computed sum values. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "ReduceSliceSum", name, _ctx.post_execution_callbacks, data, indices, axis) return _result except _core._FallbackException: try: return reduce_slice_sum_eager_fallback(data, indices, axis, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except (TypeError, ValueError): result = _dispatch.dispatch(reduce_slice_sum, data=data, indices=indices, axis=axis, name=name) if result is not _dispatch.OpDispatcher.NOT_SUPPORTED: return result raise except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. try: _, _, _op = _op_def_lib._apply_op_helper("ReduceSliceSum", data=data, indices=indices, axis=axis, name=name) except (TypeError, ValueError): result = _dispatch.dispatch(reduce_slice_sum, data=data, indices=indices, axis=axis, name=name) if result is not _dispatch.OpDispatcher.NOT_SUPPORTED: return result raise _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("T", _op._get_attr_type("T"), "Tindices", _op._get_attr_type("Tindices")) _execute.record_gradient("ReduceSliceSum", _inputs_flat, _attrs, _result, name) _result, = _result return _result
def _initialize_multi_worker(self, cluster_resolver): """Initializes the object for multi-worker training.""" cluster_spec = multi_worker_util.normalize_cluster_spec( cluster_resolver.cluster_spec()) task_type = cluster_resolver.task_type task_id = cluster_resolver.task_id if task_type is None or task_id is None: raise ValueError("When `cluster_spec` is given, you must also specify " "`task_type` and `task_id`.") self._cluster_spec = cluster_spec self._task_type = task_type self._task_id = task_id self._id_in_cluster = multi_worker_util.id_in_cluster( self._cluster_spec, self._task_type, self._task_id) self._num_workers = multi_worker_util.worker_count(cluster_spec, task_type) if not self._num_workers: raise ValueError("No `worker`, `chief` or `evaluator` tasks can be found " "in `cluster_spec`.") self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type, task_id) self._worker_device = "/job:%s/task:%d" % (task_type, task_id) self._host_input_device = numpy_dataset.SingleDevice(self._worker_device) if (ops.executing_eagerly_outside_functions() and not getattr(self, "_local_or_standalone_client_mode", False)): context.context().configure_collective_ops( collective_leader=multi_worker_util.collective_leader( cluster_spec, task_type, task_id), scoped_allocator_enabled_ops=("CollectiveReduce",), device_filters=("/job:%s/task:%d" % (task_type, task_id),)) self._collective_ops_configured = True # Starting a std server in eager mode and in independent worker mode. if (context.executing_eagerly() and not getattr(self, "_std_server_started", False) and not getattr(self, "_local_or_standalone_client_mode", False)): # Checking _local_or_standalone_client_mode as well because we should not # create the std server in standalone client mode. config_proto = copy.deepcopy(context.context().config) config_proto = self._update_config_proto(config_proto) if hasattr(cluster_resolver, "port"): port = cluster_resolver.port else: port = 0 server_def = tensorflow_server_pb2.ServerDef( cluster=cluster_spec.as_cluster_def(), default_session_config=config_proto, job_name=task_type, task_index=task_id, protocol=cluster_resolver.rpc_layer or "grpc", port=port) context.context().enable_collective_ops(server_def) self._std_server_started = True # The `ensure_initialized` is needed before calling # `context.context().devices()`. context.context().ensure_initialized() logging.info( "Enabled multi-worker collective ops with available devices: %r", context.context().devices()) # TODO(yuefengz): The `num_gpus` is only for this particular task. It # assumes all workers have the same number of GPUs. We should remove this # assumption by querying all tasks for their numbers of GPUs. # TODO(b/126786766): TFConfigClusterResolver returns wrong number of GPUs in # some cases. if isinstance(cluster_resolver, TFConfigClusterResolver): num_gpus = context.num_gpus() else: num_gpus = cluster_resolver.num_accelerators().get("GPU", 0) if num_gpus: local_devices = tuple("%s/device:GPU:%d" % (self._worker_device, i) for i in range(num_gpus)) else: local_devices = (self._worker_device,) self._collective_keys = cross_device_utils.CollectiveKeys( group_key_start=1 + self._collective_key_base) self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=local_devices, group_size=len(local_devices) * self._num_workers, collective_keys=self._collective_keys) # CrossDeviceOps for per host tensors. self._host_cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=[self._worker_device], group_size=self._num_workers, collective_keys=self._collective_keys) super(CollectiveAllReduceExtended, self)._initialize_single_worker( local_devices) # Add a default device so that ops without specified devices will not end up # on other workers. self._default_device = "/job:%s/task:%d" % (task_type, task_id) # Save the num_gpus_per_worker and rpc_layer for configure method. self._num_gpus_per_worker = num_gpus self._rpc_layer = cluster_resolver.rpc_layer self._warn_nccl_no_gpu() if self._enable_check_health and context.executing_eagerly(): self._start_check_health_thread() else: logging.info("Check health not enabled.") logging.info( "MultiWorkerMirroredStrategy with cluster_spec = %r, task_type = %r, " "task_id = %r, num_workers = %r, local_devices = %r, " "communication = %s", cluster_spec.as_dict(), task_type, task_id, self._num_workers, local_devices, self._communication_options.implementation)
def __init__(self, name, read_only_collections=True): """Construct a new FuncGraph. The graph will inherit its graph key, collections, seed, and distribution strategy stack from the current context or graph. Args: name: the name of the function. read_only_collections: whether to not write function graph collections back to default graph. Defaults to True. """ super(FuncGraph, self).__init__() self.name = name self.inputs = [] self.outputs = [] self.structured_outputs = None self._read_only_collections = read_only_collections self._weak_variables = [] self.outer_graph = ops.get_default_graph() self.captures = collections.OrderedDict() self._building_function = True # Map from resource tensor name to last op (in program order) which uses # this tensor. Used to enforce that execution order matches program order # for resource tensors. self._last_op_using_resource_tensor = {} graph = self.outer_graph # pylint: disable=protected-access # TODO(b/112906995, nareshmodi): distribution strategy depends on inheriting # this stack from the default graph even in eager mode. Maybe it should be # part of the eager context? This would also allow us to remove a # get_default_graph() call from the function cache lookup. self._distribution_strategy_stack = graph._distribution_strategy_stack # We ignore device placements from any outer scopes while tracing the # function when possible, to avoid hard-coding them in the function # graph. "Default" placements come from the PartitionedCallOp's placement, # so that the same trace of the Python function may be placed on several # different devices and saved functions may be placed on new devices when # restored. if context.executing_eagerly(): self.seed = context.global_seed() self._xla_compile = (context.context().device_spec.device_type == "TPU") if self._distribution_strategy_stack or self._xla_compile: self._add_device_to_stack(context.context().device_name) else: self.seed = graph.seed self._xla_compile = getattr(graph, "_xla_compile", False) # TODO(allenl): Figure out if we can remove colocation stack # specialization (currently used in cond_v2), here and in the cache key. self._colocation_stack = graph._colocation_stack.copy() if (self._distribution_strategy_stack or self._xla_compile or device_stack_has_callable(graph._device_function_stack)): # Hard-code devices from device functions in the function body self._device_function_stack = graph._device_function_stack.copy() if not self._read_only_collections: self._collections = graph._collections else: for collection_name in graph.get_all_collection_keys(): if collection_name not in WHITELIST_COLLECTIONS: self._collections[collection_name] = graph.get_collection( collection_name) for collection_name in WHITELIST_COLLECTIONS: self._collections[collection_name] = graph.get_collection_ref( collection_name) self._variable_creator_stack = graph._variable_creator_stack # Inherit the graph key, since this is used for matching variables in # optimizers. self._graph_key = graph._graph_key
def execute(op_name, num_outputs, inputs, attrs=None): return execute_lib.execute( op_name, num_outputs, inputs, attrs, context.context())
def roll(input, shift, axis, name=None): r"""Rolls the elements of a tensor along an axis. The elements are shifted positively (towards larger indices) by the offset of `shift` along the dimension of `axis`. Negative `shift` values will shift elements in the opposite direction. Elements that roll passed the last position will wrap around to the first and vice versa. Multiple shifts along multiple axes may be specified. For example: ``` # 't' is [0, 1, 2, 3, 4] roll(t, shift=2, axis=0) ==> [3, 4, 0, 1, 2] # shifting along multiple dimensions # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] roll(t, shift=[1, -2], axis=[0, 1]) ==> [[7, 8, 9, 5, 6], [2, 3, 4, 0, 1]] # shifting along the same axis multiple times # 't' is [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] roll(t, shift=[2, -3], axis=[1, 1]) ==> [[1, 2, 3, 4, 0], [6, 7, 8, 9, 5]] ``` Args: input: A `Tensor`. shift: A `Tensor`. Must be one of the following types: `int32`, `int64`. Dimension must be 0-D or 1-D. `shift[i]` specifies the number of places by which elements are shifted positively (towards larger indices) along the dimension specified by `axis[i]`. Negative shifts will roll the elements in the opposite direction. axis: A `Tensor`. Must be one of the following types: `int32`, `int64`. Dimension must be 0-D or 1-D. `axis[i]` specifies the dimension that the shift `shift[i]` should occur. If the same axis is referenced more than once, the total shift for that axis will be the sum of all the shifts that belong to that axis. name: A name for the operation (optional). Returns: A `Tensor`. Has the same type as `input`. """ _ctx = _context._context or _context.context() tld = _ctx._thread_local_data if tld.is_eager: try: _result = pywrap_tfe.TFE_Py_FastPathExecute( _ctx, "Roll", name, input, shift, axis) return _result except _core._NotOkStatusException as e: _ops.raise_from_not_ok_status(e, name) except _core._FallbackException: pass try: return roll_eager_fallback( input, shift, axis, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. # Add nodes to the TensorFlow graph. _, _, _op, _outputs = _op_def_library._apply_op_helper( "Roll", input=input, shift=shift, axis=axis, name=name) _result = _outputs[:] if _execute.must_record_gradient(): _attrs = ("T", _op._get_attr_type("T"), "Tshift", _op._get_attr_type("Tshift"), "Taxis", _op._get_attr_type("Taxis")) _inputs_flat = _op.inputs _execute.record_gradient( "Roll", _inputs_flat, _attrs, _result) _result, = _result return _result
def sparse_feature_cross(indices, values, shapes, dense, hashed_output, num_buckets, out_type, internal_type, name=None): r"""Generates sparse cross form a list of sparse tensors. The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each representing features of one feature column. It outputs a 2D `SparseTensor` with the batchwise crosses of these features. For example, if the inputs are inputs[0]: SparseTensor with shape = [2, 2] [0, 0]: "a" [1, 0]: "b" [1, 1]: "c" inputs[1]: SparseTensor with shape = [2, 1] [0, 0]: "d" [1, 0]: "e" inputs[2]: Tensor [["f"], ["g"]] then the output will be shape = [2, 2] [0, 0]: "a_X_d_X_f" [1, 0]: "b_X_e_X_g" [1, 1]: "c_X_e_X_g" if hashed_output=true then the output will be shape = [2, 2] [0, 0]: HashCombine( Fingerprint64("f"), HashCombine( Fingerprint64("d"), Fingerprint64("a"))) [1, 0]: HashCombine( Fingerprint64("g"), HashCombine( Fingerprint64("e"), Fingerprint64("b"))) [1, 1]: HashCombine( Fingerprint64("g"), HashCombine( Fingerprint64("e"), Fingerprint64("c"))) Args: indices: A list of `Tensor` objects with type `int64`. 2-D. Indices of each input `SparseTensor`. values: A list of `Tensor` objects with types from: `int64`, `string`. 1-D. values of each `SparseTensor`. shapes: A list with the same length as `indices` of `Tensor` objects with type `int64`. 1-D. Shapes of each `SparseTensor`. dense: A list of `Tensor` objects with types from: `int64`, `string`. 2-D. Columns represented by dense `Tensor`. hashed_output: A `bool`. num_buckets: An `int` that is `>= 0`. out_type: A `tf.DType` from: `tf.int64, tf.string`. internal_type: A `tf.DType` from: `tf.int64, tf.string`. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (output_indices, output_values, output_shape). output_indices: A `Tensor` of type `int64`. 2-D. Indices of the concatenated `SparseTensor`. output_values: A `Tensor` of type `out_type`. 1-D. Non-empty values of the concatenated or hashed `SparseTensor`. output_shape: A `Tensor` of type `int64`. 1-D. Shape of the concatenated `SparseTensor`. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "SparseFeatureCross", name, _ctx._post_execution_callbacks, indices, values, shapes, dense, "hashed_output", hashed_output, "num_buckets", num_buckets, "out_type", out_type, "internal_type", internal_type) _result = _SparseFeatureCrossOutput._make(_result) return _result except _core._FallbackException: try: return sparse_feature_cross_eager_fallback( indices, values, shapes, dense, hashed_output=hashed_output, num_buckets=num_buckets, out_type=out_type, internal_type=internal_type, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except (TypeError, ValueError): result = _dispatch.dispatch( sparse_feature_cross, indices=indices, values=values, shapes=shapes, dense=dense, hashed_output=hashed_output, num_buckets=num_buckets, out_type=out_type, internal_type=internal_type, name=name) if result is not _dispatch.OpDispatcher.NOT_SUPPORTED: return result raise except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. if not isinstance(indices, (list, tuple)): raise TypeError( "Expected list for 'indices' argument to " "'sparse_feature_cross' Op, not %r." % indices) _attr_N = len(indices) if not isinstance(shapes, (list, tuple)): raise TypeError( "Expected list for 'shapes' argument to " "'sparse_feature_cross' Op, not %r." % shapes) if len(shapes) != _attr_N: raise ValueError( "List argument 'shapes' to 'sparse_feature_cross' Op with length %d " "must match length %d of argument 'indices'." % (len(shapes), _attr_N)) hashed_output = _execute.make_bool(hashed_output, "hashed_output") num_buckets = _execute.make_int(num_buckets, "num_buckets") out_type = _execute.make_type(out_type, "out_type") internal_type = _execute.make_type(internal_type, "internal_type") try: _, _, _op = _op_def_lib._apply_op_helper( "SparseFeatureCross", indices=indices, values=values, shapes=shapes, dense=dense, hashed_output=hashed_output, num_buckets=num_buckets, out_type=out_type, internal_type=internal_type, name=name) except (TypeError, ValueError): result = _dispatch.dispatch( sparse_feature_cross, indices=indices, values=values, shapes=shapes, dense=dense, hashed_output=hashed_output, num_buckets=num_buckets, out_type=out_type, internal_type=internal_type, name=name) if result is not _dispatch.OpDispatcher.NOT_SUPPORTED: return result raise _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("N", _op.get_attr("N"), "hashed_output", _op.get_attr("hashed_output"), "num_buckets", _op.get_attr("num_buckets"), "sparse_types", _op.get_attr("sparse_types"), "dense_types", _op.get_attr("dense_types"), "out_type", _op.get_attr("out_type"), "internal_type", _op.get_attr("internal_type")) _execute.record_gradient( "SparseFeatureCross", _inputs_flat, _attrs, _result, name) _result = _SparseFeatureCrossOutput._make(_result) return _result
def _initialize_local(self, cluster_resolver, devices=None): """Initializes the object for local training.""" self._is_chief = True self._num_workers = 1 if ops.executing_eagerly_outside_functions(): try: context.context().configure_collective_ops( scoped_allocator_enabled_ops=("CollectiveReduce",)) except RuntimeError: logging.warning("Collective ops is not configured at program startup. " "Some performance features may not be enabled.") self._collective_ops_configured = True # TODO(b/126786766): TFConfigClusterResolver returns wrong number of GPUs in # some cases. if isinstance(cluster_resolver, TFConfigClusterResolver): num_gpus = context.num_gpus() else: num_gpus = cluster_resolver.num_accelerators().get("GPU", 0) if devices: local_devices = devices else: if num_gpus: local_devices = tuple("/device:GPU:%d" % i for i in range(num_gpus)) else: local_devices = ("/device:CPU:0",) self._worker_device = device_util.canonicalize("/device:CPU:0") self._host_input_device = numpy_dataset.SingleDevice(self._worker_device) self._collective_keys = cross_device_utils.CollectiveKeys( group_key_start=1 + self._collective_key_base) self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=local_devices, group_size=len(local_devices), collective_keys=self._collective_keys) # CrossDeviceOps for per host tensors. self._host_cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=[self._worker_device], group_size=self._num_workers, collective_keys=self._collective_keys) super(CollectiveAllReduceExtended, self)._initialize_single_worker( local_devices) self._cluster_spec = None self._task_type = None self._task_id = None self._id_in_cluster = 0 # This is a mark to tell whether we are running with standalone client or # independent worker. Right now with standalone client, strategy object is # created as local strategy and then turn into multi-worker strategy via # configure call. self._local_or_standalone_client_mode = True # Save the num_gpus_per_worker and rpc_layer for configure method. self._num_gpus_per_worker = num_gpus self._rpc_layer = cluster_resolver.rpc_layer self._warn_nccl_no_gpu() logging.info( "Single-worker MultiWorkerMirroredStrategy with local_devices " "= %r, communication = %s", local_devices, self._communication_options.implementation)
def ragged_range(starts, limits, deltas, Tsplits=_dtypes.int64, name=None): r"""Returns a `RaggedTensor` containing the specified sequences of numbers. Returns a `RaggedTensor` `result` composed from `rt_dense_values` and `rt_nested_splits`, such that `result[i] = range(starts[i], limits[i], deltas[i])`. ```python >>> (rt_nested_splits, rt_dense_values) = gen_ragged_ops.ragged_range( ... starts=[2, 5, 8], limits=[3, 5, 12], deltas=1) >>> result = ragged.from_nested_row_splits(rt_dense_values, rt_nested_splits) >>> print result.eval().tolist() [[2], # result[0] = range(2, 3) [], # result[1] = range(5, 5) [8, 9, 10, 11]] # result[2] = range(8, 12) ``` The input tensors `starts`, `limits`, and `deltas` may be scalars or vectors. The vector inputs must all have the same size. Scalar inputs are broadcast to match the size of the vector inputs. Args: starts: A `Tensor`. Must be one of the following types: `bfloat16`, `float32`, `float64`, `int32`, `int64`. The starts of each range. limits: A `Tensor`. Must have the same type as `starts`. The limits of each range. deltas: A `Tensor`. Must have the same type as `starts`. The deltas of each range. Tsplits: An optional `tf.DType` from: `tf.int32, tf.int64`. Defaults to `tf.int64`. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (rt_nested_splits, rt_dense_values). rt_nested_splits: A `Tensor` of type `Tsplits`. rt_dense_values: A `Tensor`. Has the same type as `starts`. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "RaggedRange", name, _ctx._post_execution_callbacks, starts, limits, deltas, "Tsplits", Tsplits) _result = _RaggedRangeOutput._make(_result) return _result except _core._FallbackException: try: return ragged_range_eager_fallback( starts, limits, deltas, Tsplits=Tsplits, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. if Tsplits is None: Tsplits = _dtypes.int64 Tsplits = _execute.make_type(Tsplits, "Tsplits") _, _, _op = _op_def_lib._apply_op_helper( "RaggedRange", starts=starts, limits=limits, deltas=deltas, Tsplits=Tsplits, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("T", _op._get_attr_type("T"), "Tsplits", _op._get_attr_type("Tsplits")) _execute.record_gradient( "RaggedRange", _inputs_flat, _attrs, _result, name) _result = _RaggedRangeOutput._make(_result) return _result
def ragged_tensor_from_variant(encoded_ragged, input_ragged_rank, output_ragged_rank, Tvalues, Tsplits, name=None): r"""Decodes a `variant` Tensor into a `RaggedTensor`. Decodes the given `variant` Tensor and returns a `RaggedTensor`. The input could be a scalar, meaning it encodes a single `RaggedTensor` with ragged_rank `output_ragged_rank`. It could also have an arbitrary rank, in which case each element is decoded into a `RaggedTensor` with ragged_rank `input_ragged_rank` and these are then stacked according to the input shape to output a single `RaggedTensor` with ragged_rank `output_ragged_rank`. Each `variant` element in the input Tensor is decoded by retrieving from the element a 1-D `variant` Tensor with `input_ragged_rank + 1` Tensors, corresponding to the splits and values of the decoded `RaggedTensor`. If `input_ragged_rank` is -1, then it is inferred as `output_ragged_rank` - `rank(encoded_ragged)`. See `RaggedTensorToVariant` for the corresponding encoding logic. Args: encoded_ragged: A `Tensor` of type `variant`. A `variant` Tensor containing encoded `RaggedTensor`s. input_ragged_rank: An `int` that is `>= -1`. The ragged rank of each encoded `RaggedTensor` component in the input. If set to -1, this is inferred as `output_ragged_rank` - `rank(encoded_ragged)` output_ragged_rank: An `int` that is `>= 1`. The expected ragged rank of the output `RaggedTensor`. The following must hold: `output_ragged_rank = rank(encoded_ragged) + input_ragged_rank`. Tvalues: A `tf.DType`. Tsplits: A `tf.DType` from: `tf.int32, tf.int64`. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (output_nested_splits, output_dense_values). output_nested_splits: A list of `output_ragged_rank` `Tensor` objects with type `Tsplits`. output_dense_values: A `Tensor` of type `Tvalues`. """ _ctx = _context._context or _context.context() if _ctx is not None and _ctx._thread_local_data.is_eager: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._context_handle, _ctx._thread_local_data.device_name, "RaggedTensorFromVariant", name, _ctx._post_execution_callbacks, encoded_ragged, "input_ragged_rank", input_ragged_rank, "output_ragged_rank", output_ragged_rank, "Tvalues", Tvalues, "Tsplits", Tsplits) _result = _RaggedTensorFromVariantOutput._make(_result) return _result except _core._FallbackException: try: return ragged_tensor_from_variant_eager_fallback( encoded_ragged, input_ragged_rank=input_ragged_rank, output_ragged_rank=output_ragged_rank, Tvalues=Tvalues, Tsplits=Tsplits, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None) # Add nodes to the TensorFlow graph. input_ragged_rank = _execute.make_int(input_ragged_rank, "input_ragged_rank") output_ragged_rank = _execute.make_int(output_ragged_rank, "output_ragged_rank") Tvalues = _execute.make_type(Tvalues, "Tvalues") Tsplits = _execute.make_type(Tsplits, "Tsplits") _, _, _op = _op_def_lib._apply_op_helper( "RaggedTensorFromVariant", encoded_ragged=encoded_ragged, input_ragged_rank=input_ragged_rank, output_ragged_rank=output_ragged_rank, Tvalues=Tvalues, Tsplits=Tsplits, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("input_ragged_rank", _op.get_attr("input_ragged_rank"), "output_ragged_rank", _op.get_attr("output_ragged_rank"), "Tvalues", _op.get_attr("Tvalues"), "Tsplits", _op.get_attr("Tsplits")) _execute.record_gradient("RaggedTensorFromVariant", _inputs_flat, _attrs, _result, name) _result = [_result[:output_ragged_rank]] + _result[output_ragged_rank:] _result = _RaggedTensorFromVariantOutput._make(_result) return _result
def debug_identity_v2(input, tfdbg_context_id="", op_name="", output_slot=-1, tensor_debug_mode=-1, debug_urls=[], circular_buffer_size=1000, tfdbg_run_id="", name=None): r"""Debug Identity V2 Op. Provides an identity mapping from input to output, while writing the content of the input tensor by calling DebugEventsWriter. The semantics of the input tensor depends on tensor_debug_mode. In typical usage, the input tensor comes directly from the user computation only when graph_debug_mode is FULL_TENSOR (see protobuf/debug_event.proto for a list of all the possible values of graph_debug_mode). For the other debug modes, the input tensor should be produced by an additional op or subgraph that computes summary information about one or more tensors. Args: input: A `Tensor`. Input tensor, non-Reference type tfdbg_context_id: An optional `string`. Defaults to `""`. A tfdbg-generated ID for the context that the op belongs to, e.g., a concrete compiled tf.function. op_name: An optional `string`. Defaults to `""`. Optional. Name of the op that the debug op is concerned with. Used only for single-tensor trace. output_slot: An optional `int`. Defaults to `-1`. Optional. Output slot index of the tensor that the debug op is concerned with. Used only for single-tensor trace. tensor_debug_mode: An optional `int`. Defaults to `-1`. TensorDebugMode enum value. See debug_event.proto for details. debug_urls: An optional list of `strings`. Defaults to `[]`. List of URLs to debug targets, e.g., file:///foo/tfdbg_dump. circular_buffer_size: An optional `int`. Defaults to `1000`. tfdbg_run_id: An optional `string`. Defaults to `""`. name: A name for the operation (optional). Returns: A `Tensor`. Has the same type as `input`. """ _ctx = _context._context or _context.context() tld = _ctx._thread_local_data if tld.is_eager: try: _result = pywrap_tfe.TFE_Py_FastPathExecute( _ctx._context_handle, tld.device_name, "DebugIdentityV2", name, tld.op_callbacks, input, "tfdbg_context_id", tfdbg_context_id, "op_name", op_name, "output_slot", output_slot, "tensor_debug_mode", tensor_debug_mode, "debug_urls", debug_urls, "circular_buffer_size", circular_buffer_size, "tfdbg_run_id", tfdbg_run_id) return _result except _core._NotOkStatusException as e: _ops.raise_from_not_ok_status(e, name) except _core._FallbackException: pass try: return debug_identity_v2_eager_fallback( input, tfdbg_context_id=tfdbg_context_id, op_name=op_name, output_slot=output_slot, tensor_debug_mode=tensor_debug_mode, debug_urls=debug_urls, circular_buffer_size=circular_buffer_size, tfdbg_run_id=tfdbg_run_id, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. # Add nodes to the TensorFlow graph. if tfdbg_context_id is None: tfdbg_context_id = "" tfdbg_context_id = _execute.make_str(tfdbg_context_id, "tfdbg_context_id") if op_name is None: op_name = "" op_name = _execute.make_str(op_name, "op_name") if output_slot is None: output_slot = -1 output_slot = _execute.make_int(output_slot, "output_slot") if tensor_debug_mode is None: tensor_debug_mode = -1 tensor_debug_mode = _execute.make_int(tensor_debug_mode, "tensor_debug_mode") if debug_urls is None: debug_urls = [] if not isinstance(debug_urls, (list, tuple)): raise TypeError("Expected list for 'debug_urls' argument to " "'debug_identity_v2' Op, not %r." % debug_urls) debug_urls = [_execute.make_str(_s, "debug_urls") for _s in debug_urls] if circular_buffer_size is None: circular_buffer_size = 1000 circular_buffer_size = _execute.make_int(circular_buffer_size, "circular_buffer_size") if tfdbg_run_id is None: tfdbg_run_id = "" tfdbg_run_id = _execute.make_str(tfdbg_run_id, "tfdbg_run_id") _, _, _op, _outputs = _op_def_library._apply_op_helper( "DebugIdentityV2", input=input, tfdbg_context_id=tfdbg_context_id, op_name=op_name, output_slot=output_slot, tensor_debug_mode=tensor_debug_mode, debug_urls=debug_urls, circular_buffer_size=circular_buffer_size, tfdbg_run_id=tfdbg_run_id, name=name) _result = _outputs[:] if _execute.must_record_gradient(): _attrs = ("T", _op._get_attr_type("T"), "tfdbg_context_id", _op.get_attr("tfdbg_context_id"), "op_name", _op.get_attr("op_name"), "output_slot", _op._get_attr_int("output_slot"), "tensor_debug_mode", _op._get_attr_int("tensor_debug_mode"), "debug_urls", _op.get_attr("debug_urls"), "circular_buffer_size", _op._get_attr_int("circular_buffer_size"), "tfdbg_run_id", _op.get_attr("tfdbg_run_id")) _inputs_flat = _op.inputs _execute.record_gradient("DebugIdentityV2", _inputs_flat, _attrs, _result) _result, = _result return _result
def dense_to_dense_set_operation(set1, set2, set_operation, validate_indices=True, name=None): r"""Applies set operation along last dimension of 2 `Tensor` inputs. See SetOperationOp::SetOperationFromContext for values of `set_operation`. Output `result` is a `SparseTensor` represented by `result_indices`, `result_values`, and `result_shape`. For `set1` and `set2` ranked `n`, this has rank `n` and the same 1st `n-1` dimensions as `set1` and `set2`. The `nth` dimension contains the result of `set_operation` applied to the corresponding `[0...n-1]` dimension of `set`. Args: set1: A `Tensor`. Must be one of the following types: `int8`, `int16`, `int32`, `int64`, `uint8`, `uint16`, `string`. `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set2`. Dimension `n` contains values in a set, duplicates are allowed but ignored. set2: A `Tensor`. Must have the same type as `set1`. `Tensor` with rank `n`. 1st `n-1` dimensions must be the same as `set1`. Dimension `n` contains values in a set, duplicates are allowed but ignored. set_operation: A `string`. validate_indices: An optional `bool`. Defaults to `True`. name: A name for the operation (optional). Returns: A tuple of `Tensor` objects (result_indices, result_values, result_shape). result_indices: A `Tensor` of type `int64`. result_values: A `Tensor`. Has the same type as `set1`. result_shape: A `Tensor` of type `int64`. """ _ctx = _context.context() if not _ctx.executing_eagerly(): set_operation = _execute.make_str(set_operation, "set_operation") if validate_indices is None: validate_indices = True validate_indices = _execute.make_bool(validate_indices, "validate_indices") _, _, _op = _op_def_lib._apply_op_helper( "DenseToDenseSetOperation", set1=set1, set2=set2, set_operation=set_operation, validate_indices=validate_indices, name=name) _result = _op.outputs[:] _inputs_flat = _op.inputs _attrs = ("set_operation", _op.get_attr("set_operation"), "validate_indices", _op.get_attr("validate_indices"), "T", _op.get_attr("T")) _execute.record_gradient( "DenseToDenseSetOperation", _inputs_flat, _attrs, _result, name) _result = _DenseToDenseSetOperationOutput._make(_result) return _result else: try: _result = _pywrap_tensorflow.TFE_Py_FastPathExecute( _ctx._handle, _ctx.device_name, "DenseToDenseSetOperation", name, _ctx._post_execution_callbacks, set1, set2, "set_operation", set_operation, "validate_indices", validate_indices) _result = _DenseToDenseSetOperationOutput._make(_result) return _result except _core._FallbackException: return dense_to_dense_set_operation_eager_fallback( set1, set2, set_operation=set_operation, validate_indices=validate_indices, name=name) except _core._NotOkStatusException as e: if name is not None: message = e.message + " name: " + name else: message = e.message _six.raise_from(_core._status_to_exception(e.code, message), None)
def single_image_random_dot_stereograms_eager_fallback( depth_values, hidden_surface_removal=True, convergence_dots_size=8, dots_per_inch=72, eye_separation=2.5, mu=0.3333, normalize=True, normalize_max=-100, normalize_min=100, border_level=0, number_colors=256, output_image_shape=[1024, 768, 1], output_data_window=[1022, 757], name=None, ctx=None): r"""This is the slowpath function for Eager mode. This is for function single_image_random_dot_stereograms """ _ctx = ctx if ctx else _context.context() if hidden_surface_removal is None: hidden_surface_removal = True hidden_surface_removal = _execute.make_bool(hidden_surface_removal, "hidden_surface_removal") if convergence_dots_size is None: convergence_dots_size = 8 convergence_dots_size = _execute.make_int(convergence_dots_size, "convergence_dots_size") if dots_per_inch is None: dots_per_inch = 72 dots_per_inch = _execute.make_int(dots_per_inch, "dots_per_inch") if eye_separation is None: eye_separation = 2.5 eye_separation = _execute.make_float(eye_separation, "eye_separation") if mu is None: mu = 0.3333 mu = _execute.make_float(mu, "mu") if normalize is None: normalize = True normalize = _execute.make_bool(normalize, "normalize") if normalize_max is None: normalize_max = -100 normalize_max = _execute.make_float(normalize_max, "normalize_max") if normalize_min is None: normalize_min = 100 normalize_min = _execute.make_float(normalize_min, "normalize_min") if border_level is None: border_level = 0 border_level = _execute.make_float(border_level, "border_level") if number_colors is None: number_colors = 256 number_colors = _execute.make_int(number_colors, "number_colors") if output_image_shape is None: output_image_shape = [1024, 768, 1] output_image_shape = _execute.make_shape(output_image_shape, "output_image_shape") if output_data_window is None: output_data_window = [1022, 757] output_data_window = _execute.make_shape(output_data_window, "output_data_window") _attr_T, (depth_values, ) = _execute.args_to_matching_eager([depth_values], _ctx) _inputs_flat = [depth_values] _attrs = ("T", _attr_T, "hidden_surface_removal", hidden_surface_removal, "convergence_dots_size", convergence_dots_size, "dots_per_inch", dots_per_inch, "eye_separation", eye_separation, "mu", mu, "normalize", normalize, "normalize_max", normalize_max, "normalize_min", normalize_min, "border_level", border_level, "number_colors", number_colors, "output_image_shape", output_image_shape, "output_data_window", output_data_window) _result = _execute.execute(b"SingleImageRandomDotStereograms", 1, inputs=_inputs_flat, attrs=_attrs, ctx=_ctx, name=name) _execute.record_gradient("SingleImageRandomDotStereograms", _inputs_flat, _attrs, _result, name) _result, = _result return _result
def debug_numeric_summary_v2(input, output_dtype=_dtypes.float32, tensor_debug_mode=-1, tensor_id=-1, name=None): r"""Debug Numeric Summary V2 Op. Computes a numeric summary of the input tensor. The shape of the output depends on the tensor_debug_mode attribute. This op is used internally by TensorFlow Debugger (tfdbg) v2. Args: input: A `Tensor`. Input tensor, to be summarized by the op. output_dtype: An optional `tf.DType` from: `tf.float32, tf.float64`. Defaults to `tf.float32`. Optional. The type of the output. Can be float32 or float64 (default: float32). tensor_debug_mode: An optional `int`. Defaults to `-1`. Tensor debug mode: the mode in which the input tensor is summarized by the op. See the TensorDebugMode enum in tensorflow/core/protobuf/debug_event.proto for details. Supported values: 2 (CURT_HEALTH): Output a float32/64 tensor of shape [2]. The 1st element is the tensor_id, if provided, and -1 otherwise. The 2nd element is a bit which is set to 1 if the input tensor has an infinity or nan value, or zero otherwise. 3 (CONCISE_HEALTH): Output a float32/64 tensor of shape [5]. The 1st element is the tensor_id, if provided, and -1 otherwise. The remaining four slots are the total number of elements, -infs, +infs, and nans in the input tensor respectively. 4 (FULL_HEALTH): Output a float32/64 tensor of shape [11]. The 1st element is the tensor_id, if provided, and -1 otherwise. The 2nd element is the device_id, if provided, and -1 otherwise. The 3rd element holds the datatype value of the input tensor as according to the enumerated type in tensorflow/core/framework/types.proto. The remaining elements hold the total number of elements, -infs, +infs, nans, negative finite numbers, zeros, and positive finite numbers in the input tensor respectively. 5 (SHAPE): Output a float32/64 tensor of shape [10]. The 1st element is the tensor_id, if provided, and -1 otherwise. The 2nd element holds the datatype value of the input tensor as according to the enumerated type in tensorflow/core/framework/types.proto. The 3rd element holds the rank of the tensor. The 4th element holds the number of elements within the tensor. Finally the remaining 6 elements hold the shape of the tensor. If the rank of the tensor is lower than 6, the shape is right padded with zeros. If the rank is greater than 6, the head of the shape is truncated. 6 (FULL_NUMERICS): Output a float32/64 tensor of shape [22]. The 1st element is the tensor_id, if provided, and -1 otherwise. The 2nd element is the device_id, if provided, and -1 otherwise. The 3rd element holds the datatype value of the input tensor as according to the enumerated type in tensorflow/core/framework/types.proto. The 4th element holds the rank of the tensor. The 5th to 11th elements hold the shape of the tensor. If the rank of the tensor is lower than 6, the shape is right padded with zeros. If the rank is greater than 6, the head of the shape is truncated. The 12th to 18th elements hold the number of elements, -infs, +infs, nans, denormal floats, negative finite numbers, zeros, and positive finite numbers in the input tensor respectively. The final four elements hold the min value, max value, mean, and variance of the input tensor. 8 (REDUCE_INF_NAN_THREE_SLOTS): Output a float32/64 tensor of shape [3]. The 1st element is -inf if any elements of the input tensor is -inf, or zero otherwise. The 2nd element is +inf if any elements of the input tensor is +inf, or zero otherwise. The 3rd element is nan if any element of the input tensor is nan, or zero otherwise. tensor_id: An optional `int`. Defaults to `-1`. Optional. An integer identifier for the tensor being summarized by this op. name: A name for the operation (optional). Returns: A `Tensor` of type `output_dtype`. """ _ctx = _context._context or _context.context() tld = _ctx._thread_local_data if tld.is_eager: try: _result = pywrap_tfe.TFE_Py_FastPathExecute( _ctx._context_handle, tld.device_name, "DebugNumericSummaryV2", name, tld.op_callbacks, input, "output_dtype", output_dtype, "tensor_debug_mode", tensor_debug_mode, "tensor_id", tensor_id) return _result except _core._NotOkStatusException as e: _ops.raise_from_not_ok_status(e, name) except _core._FallbackException: pass try: return debug_numeric_summary_v2_eager_fallback( input, output_dtype=output_dtype, tensor_debug_mode=tensor_debug_mode, tensor_id=tensor_id, name=name, ctx=_ctx) except _core._SymbolicException: pass # Add nodes to the TensorFlow graph. # Add nodes to the TensorFlow graph. if output_dtype is None: output_dtype = _dtypes.float32 output_dtype = _execute.make_type(output_dtype, "output_dtype") if tensor_debug_mode is None: tensor_debug_mode = -1 tensor_debug_mode = _execute.make_int(tensor_debug_mode, "tensor_debug_mode") if tensor_id is None: tensor_id = -1 tensor_id = _execute.make_int(tensor_id, "tensor_id") _, _, _op, _outputs = _op_def_library._apply_op_helper( "DebugNumericSummaryV2", input=input, output_dtype=output_dtype, tensor_debug_mode=tensor_debug_mode, tensor_id=tensor_id, name=name) _result = _outputs[:] if _execute.must_record_gradient(): _attrs = ("output_dtype", _op._get_attr_type("output_dtype"), "T", _op._get_attr_type("T"), "tensor_debug_mode", _op._get_attr_int("tensor_debug_mode"), "tensor_id", _op._get_attr_int("tensor_id")) _inputs_flat = _op.inputs _execute.record_gradient("DebugNumericSummaryV2", _inputs_flat, _attrs, _result) _result, = _result return _result
def __init__(self, dataset): """Creates a new iterator over the given dataset. For example: ```python dataset = tf.data.Dataset.range(4) for x in Iterator(dataset): print(x) ``` Tensors produced will be placed on the device on which this iterator object was created. Args: dataset: A `tf.data.Dataset` object. Raises: RuntimeError: When invoked without eager execution enabled. """ if not context.in_eager_mode(): raise RuntimeError( "{} objects can only be used when eager execution is enabled, use " "tf.data.Dataset.make_iterator or " "tf.data.Dataset.make_one_shot_iterator for graph construction" .format(type(self))) with ops.device("/device:CPU:0"): ds_variant = dataset._as_variant_tensor() # pylint: disable=protected-access self._output_types = dataset.output_types self._output_shapes = dataset.output_shapes self._flat_output_types = nest.flatten(dataset.output_types) self._flat_output_shapes = nest.flatten(dataset.output_shapes) self._resource = gen_dataset_ops.iterator( container="", shared_name=_generate_shared_name("eager_iterator"), output_types=self._flat_output_types, output_shapes=self._flat_output_shapes) gen_dataset_ops.make_iterator(ds_variant, self._resource) # Delete the resource when this object is deleted self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._resource, handle_device="/device:CPU:0") self._device = context.context().device_name self._buffer_resource_handle = None if not context.context().device_spec.device_type: is_remote_device = False else: is_remote_device = context.context( ).device_spec.device_type != "CPU" if is_remote_device: with ops.device("/device:CPU:0"): iter_string_handle = gen_dataset_ops.iterator_to_string_handle( self._resource) @function.Defun(dtypes.string) def remote_fn(h): remote_iterator = iterator_ops.Iterator.from_string_handle( h, self._output_types, self._output_shapes) return remote_iterator.get_next() remote_fn.add_to_graph(None) target = constant_op.constant("/device:CPU:0") with ops.device(self._device): self._buffer_resource_handle = prefetching_ops.function_buffering_resource( string_arg=iter_string_handle, f=remote_fn, target_device=target, buffer_size=10, thread_pool_size=1, container="", shared_name=_generate_shared_name( "function_buffer_resource")) self._buffer_resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._buffer_resource_handle, handle_device=self._device)