def set_weights(distribution_strategy, dist_model, weights): """Sets the weights of the replicated models. The weights of the replicated models are set to the weights of the original model. The weights of the replicated model are Mirrored variables and hence we need to use the `update` call within a DistributionStrategy scope. Args: distribution_strategy: DistributionStrategy used to distribute training and validation. dist_model: The replicated models on the different devices. weights: The weights of the original model. """ assign_ops = [] for layer in dist_model.layers: num_param = len(layer.weights) layer_weights = weights[:num_param] for sw, w in zip(layer.weights, layer_weights): if ops.executing_eagerly_outside_functions(): sw.assign(w) else: assign_ops.append(distribution_strategy.unwrap(sw.assign(w))) weights = weights[num_param:] if not ops.executing_eagerly_outside_functions(): K.get_session(assign_ops).run(assign_ops)
def canonicalize(d, default=None): """Canonicalize device string. If d has missing components, the rest would be deduced from the `default` argument or from '/replica:0/task:0/device:CPU:0'. For example: If d = '/cpu:0', default='/job:worker/task:1', it returns '/job:worker/replica:0/task:1/device:CPU:0'. If d = '/cpu:0', default='/job:worker', it returns '/job:worker/replica:0/task:0/device:CPU:0'. If d = '/gpu:0', default=None, it returns '/replica:0/task:0/device:GPU:0'. Note: This uses "job:localhost" as the default if executing eagerly. Args: d: a device string. default: a string for default device if d doesn't have all components. Returns: a canonicalized device string. """ d = tf_device.DeviceSpec.from_string(d) assert d.device_type is None or d.device_type == d.device_type.upper(), ( "Device type '%s' must be all-caps." % (d.device_type,)) # Fill in missing device fields using defaults. result = tf_device.DeviceSpec( replica=0, task=0, device_type="CPU", device_index=0) if ops.executing_eagerly_outside_functions(): result.job = "localhost" if default: result.merge_from(tf_device.DeviceSpec.from_string(default)) result.merge_from(d) return result.to_string()
def __init__(self, fn_graph, variable_holder, attrs=None, signature=None): super(WrappedFunction, self).__init__( fn_graph, attrs=attrs, signature=signature) self._variable_holder = variable_holder if ops.executing_eagerly_outside_functions(): # TODO(allenl): Make this work in 1.x? self._lift_unlifted_variables()
def apply_gradients(self, grads_and_vars, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ grads_and_vars = _filter_grads(grads_and_vars) var_list = [v for (_, v) in grads_and_vars] if distribution_strategy_context.has_distribution_strategy(): reduced_grads = merge_grads(grads_and_vars) grads_and_vars = zip(reduced_grads, var_list) with ops.init_scope(): self._prepare() self._create_slots(var_list) update_ops = [] def update_grad_to_var(grad, var): """Apply gradient to variable.""" if isinstance(var, ops.Tensor): raise NotImplementedError("Trying to update a Tensor ", var) if isinstance(grad, ops.IndexedSlices): if var.constraint is not None: raise RuntimeError( "Cannot use a constraint function on a sparse variable.") return self._resource_apply_sparse_duplicate_indices( grad.values, var, grad.indices) update_op = self._resource_apply_dense(grad, var) if var.constraint is not None: with ops.control_dependencies([update_op]): return var.assign(var.constraint(var)) else: return update_op with ops.name_scope(name, self._name) as name: for grad, var in grads_and_vars: scope_name = ("" if ops.executing_eagerly_outside_functions() else "_" + var.op.name) with ops.name_scope("update" + scope_name): update_ops.append(update_grad_to_var(grad, var)) # control dependencies does not work in per replica mode, please change # this once b/118841692 is fixed. # with ops.control_dependencies(update_ops): # apply_updates = self._iterations.assign_add(1).op apply_updates = merge_update_step(update_ops, self.iterations) return apply_updates
def call(self, inputs, mask=None, training=None, initial_state=None): if isinstance(inputs, list): initial_state = inputs[1:] inputs = inputs[0] elif initial_state is not None: pass elif self.stateful: initial_state = self.states else: initial_state = self.get_initial_state(inputs) if len(initial_state) != len(self.states): raise ValueError('Layer has ' + str(len(self.states)) + ' states but was passed ' + str(len(initial_state)) + ' initial states.') if self.go_backwards: # Reverse time axis. inputs = K.reverse(inputs, 1) if ops.executing_eagerly_outside_functions(): if context.num_gpus() > 0: outputs, [new_h, new_c], runtime = cudnn_lstm( inputs, initial_state[0], initial_state[1], self.kernel, self.recurrent_kernel, self.bias, self.units) else: outputs, [new_h, new_c], runtime = normal_lstm( inputs, initial_state[0], initial_state[1], self.kernel, self.recurrent_kernel, self.bias, self.units, self.activation, self.recurrent_activation) else: outputs, [new_h, new_c], runtime = normal_lstm( inputs, initial_state[0], initial_state[1], self.kernel, self.recurrent_kernel, self.bias, self.units, self.activation, self.recurrent_activation) function.register(cudnn_lstm, inputs, initial_state[0], initial_state[1], self.kernel, self.recurrent_kernel, self.bias, self.units) states = [new_h, new_c] if self.stateful: updates = [] for i in range(len(states)): updates.append(state_ops.assign(self.states[i], states[i])) self.add_update(updates, inputs) if self.return_sequences: output = outputs else: output = outputs[:, -1, :] if self.return_state: return [output] + states else: return output, runtime
def __enter__(self): # Only run in V2 Function mode. if (context.executing_eagerly() or not ops.executing_eagerly_outside_functions()): return self self._graph = ops.get_default_graph() self._num_operations = len(self._graph.get_operations()) return self
def current(): """Return a string (not canonicalized) for the current device.""" # TODO(josh11b): Work out how this function interacts with ops.colocate_with. if ops.executing_eagerly_outside_functions(): d = context.context().device_name else: op = _FakeOperation() ops.get_default_graph()._apply_device_functions(op) # pylint: disable=protected-access d = op.device return d
def _create_keras_history_helper(tensors, processed_ops, created_layers): """Helper method for `create_keras_history`. Arguments: tensors: A structure of Tensors for which to create Keras metadata. processed_ops: Set. TensorFlow operations that have already been wrapped in `TensorFlowOpLayer` instances. created_layers: List. The `TensorFlowOpLayer` instances created. Returns: Tuple. First element is the updated set of TensorFlow Operations that have been wrapped in `TensorFlowOpLayer` instances. Second element is a list of the `TensorFlowOpLayer` instances created. """ # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. # Cannot be imported at top because of circular dependencies. # TODO(omalleyt): Resolve circular dependency. from tensorflow.python.keras.engine import base_layer # pylint: disable=g-import-not-at-top tensor_list = nest.flatten(tensors) for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue op = tensor.op # The Op that created this Tensor. if op not in processed_ops: # Recursively set `_keras_history`. op_inputs = list(op.inputs) constants = {} layer_inputs = [] for i, op_input in enumerate(op_inputs): if uses_keras_history(op_input): layer_inputs.append(op_input) else: # Treat any value not originating from a `keras.Input` as # a constant. Variables cannot be supported. if (distribution_strategy_context.in_cross_replica_context() and not ops.executing_eagerly_outside_functions()): # In Legacy Graph mode, evaluating here makes Session be # configured improperly. constants[i] = op_input else: constants[i] = backend.function([], op_input)([]) processed_ops, created_layers = _create_keras_history_helper( layer_inputs, processed_ops, created_layers) name = op.name node_def = op.node_def.SerializeToString() op_layer = base_layer.TensorFlowOpLayer( node_def, constants=constants, name=name) created_layers.append(op_layer) op_layer._add_inbound_node( # pylint: disable=protected-access layer_inputs, op.outputs) processed_ops.update([op]) return processed_ops, created_layers
def _distributed_apply(self, distribution, grads_and_vars, name): """`apply_gradients` using a `DistributionStrategy`.""" reduced_grads = distribution.extended.batch_reduce_to( ds_reduce_util.ReduceOp.SUM, grads_and_vars) var_list = [v for _, v in grads_and_vars] grads_and_vars = zip(reduced_grads, var_list) def apply_grad_to_update_var(var, grad): """Apply gradient to variable.""" if isinstance(var, ops.Tensor): raise NotImplementedError("Trying to update a Tensor ", var) if isinstance(grad, ops.IndexedSlices): if var.constraint is not None: raise RuntimeError( "Cannot use a constraint function on a sparse variable.") return self._resource_apply_sparse_duplicate_indices( grad.values, var, grad.indices) update_op = self._resource_apply_dense(grad, var) if var.constraint is not None: with ops.control_dependencies([update_op]): return var.assign(var.constraint(var)) else: return update_op update_ops = [] with backend.name_scope(name or self._name): for grad, var in grads_and_vars: scope_name = ("" if ops.executing_eagerly_outside_functions() else "_" + var.op.name) with backend.name_scope("update" + scope_name): update_ops.extend( distribution.extended.update( var, apply_grad_to_update_var, args=(grad,), group=False)) any_symbolic = any(isinstance(i, ops.Operation) or tf_utils.is_symbolic_tensor(i) for i in update_ops) if not context.executing_eagerly() or any_symbolic: # If the current context is graph mode or any of the update ops are # symbolic then the step update should be carried out under a graph # context. (eager updates execute immediately) with ops._get_graph_from_inputs(update_ops).as_default(): # pylint: disable=protected-access with ops.control_dependencies(update_ops): return self._iterations.assign_add(1).op return self._iterations.assign_add(1)
def restore_variables(self, wrapped, saver): """Restores variables from the checkpoint.""" if saver is not None: saver_def = saver.saver_def filename_tensor = wrapped.graph.as_graph_element( saver_def.filename_tensor_name) # We both feed and fetch filename_tensor so we have an operation to use to # feed into variable initializers (only relevant for v1 graph building). restore_fn = wrapped.prune( feeds=[filename_tensor], fetches=[filename_tensor, wrapped.graph.as_graph_element(saver_def.restore_op_name)]) initializer, _ = restore_fn(constant_op.constant(self._variables_path)) if not ops.executing_eagerly_outside_functions(): for variable in wrapped.graph.get_collection_ref( ops.GraphKeys.GLOBAL_VARIABLES): # pylint: disable=protected-access variable._initializer_op = initializer
def __del__(self): if ops.executing_eagerly_outside_functions(): return if self._sated: return if self._fatal_error_if_unsated: logger = tf_logging.fatal else: logger = tf_logging.error creation_stack = ''.join( [line.rstrip() for line in traceback.format_stack(self._stack_frame)]) logger( '==================================\n' 'Object was never used (type %s):\n%s\nIf you want to mark it as ' 'used call its "mark_used()" method.\nIt was originally created ' 'here:\n%s\n' '==================================' % (self._type, self._repr, creation_stack))
def _distributed_apply(self, distribution, grads_and_vars, name): """`apply_gradients` using a `DistributionStrategy`.""" reduced_grads = distribution.extended.batch_reduce_to( ds_reduce_util.ReduceOp.SUM, grads_and_vars) var_list = [v for _, v in grads_and_vars] grads_and_vars = zip(reduced_grads, var_list) def apply_grad_to_update_var(var, grad): """Apply gradient to variable.""" if isinstance(var, ops.Tensor): raise NotImplementedError("Trying to update a Tensor ", var) if isinstance(grad, ops.IndexedSlices): if var.constraint is not None: raise RuntimeError( "Cannot use a constraint function on a sparse variable.") return self._resource_apply_sparse_duplicate_indices( grad.values, var, grad.indices) update_op = self._resource_apply_dense(grad, var) if var.constraint is not None: with ops.control_dependencies([update_op]): return var.assign(var.constraint(var)) else: return update_op update_ops = [] with ops.name_scope(name, self._name) as name: for grad, var in grads_and_vars: scope_name = ("" if ops.executing_eagerly_outside_functions() else "_" + var.op.name) with ops.name_scope("update" + scope_name): update_ops.extend( distribution.extended.update( var, apply_grad_to_update_var, args=(grad,), group=False)) with ops.control_dependencies(update_ops): apply_updates = self._iterations.assign_add(1) if not context.executing_eagerly(): apply_updates = apply_updates.op return apply_updates
def get_slot(self, var, name): """Return a slot named `name` created for `var` by the Optimizer. Some `Optimizer` subclasses use additional variables. For example `Momentum` and `Adagrad` use variables to accumulate updates. This method gives access to these `Variable` objects if for some reason you need them. Use `get_slot_names()` to get the list of slot names created by the `Optimizer`. Args: var: A variable passed to `minimize()` or `apply_gradients()`. name: A string. Returns: The `Variable` for the slot if it was created, `None` otherwise. """ # pylint: disable=protected-access named_slots = self._slots.get(name, None) if not named_slots: return None if hasattr(var, "_distributed_container"): # NOTE: If this isn't patched, then there is no `handle` in # `_resource_apply_dense`. distributed_container = var._distributed_container() assert distributed_container is not None if ops.executing_eagerly_outside_functions(): key = distributed_container._unique_id else: key = (distributed_container.graph, distributed_container._shared_name) # pylint: enable=protected-access mirrored_slot = named_slots.get(key, None) if mirrored_slot is None: return None return mirrored_slot.get(device=var.device) return named_slots.get(_var_key(var), None)
def shutdown_tpu_system(cluster_resolver=None): """Shuts down the TPU devices. This will clear all caches, even those that are maintained through sequential calls to tf.tpu.experimental.initialize_tpu_system, such as the compilation cache. Args: cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver, which provides information about the TPU cluster. Raises: RuntimeError: If no TPU devices found for eager execution or if run in a tf.function. """ job = None if cluster_resolver is None: # If no cluster resolver is specified, and running eagerly, execute the init # ops in the current device scope. if context.executing_eagerly(): curr_device = device.DeviceSpec.from_string(context.context().device_name) if curr_device.job is not None: job = "{}/replica:0/task:0".format(curr_device.job) cluster_resolver = TPUClusterResolver("") assert isinstance(cluster_resolver, TPUClusterResolver) tpu_name = compat.as_text(cluster_resolver._tpu) # pylint: disable=protected-access if tpu_name not in _INITIALIZED_TPU_SYSTEMS: logging.warning("You are shutting down a TPU system %s that has not been " "initialized.") logging.info("Shutting down the TPU system: %s", tpu_name) if context.executing_eagerly(): # This function looks as it is for the following non-intuitive reasons. # tpu.shutdown_system creates a dummy op whose sole purpose is to trigger # DistributedTPURewritePass. This pass actually adds real ops that # shutdown the TPU system. Thus, we can't simply run tpu.shutdown_system # eagerly. We need to wrap it in defun and trigger the rewrite passes on it. if tpu_name not in _LOCAL_MASTERS: # Explicitly place the tpu.shutdown_system in the first worker to # avoid the output node match multiple devices error. job = "{}/replica:0/task:0".format(cluster_resolver.get_job_name()) @function.defun def _tpu_shutdown_fn(): tpu.shutdown_system(job=job) # The TPU_SYSTEM device must match the device used in tpu.shutdown_system # exactly, otherwise you can get errors if there are multiple TPU_SYSTEM # devices available. with ops.device(tpu._tpu_system_device_name(job)): # pylint: disable=protected-access _tpu_shutdown_fn() # Clear out the eager context caches since the memory is invalid now. logging.info("Clearing out eager caches") context.context()._clear_caches() # pylint: disable=protected-access elif not ops.executing_eagerly_outside_functions(): master = cluster_resolver.master() cluster_spec = cluster_resolver.cluster_spec() session_config = config_pb2.ConfigProto(allow_soft_placement=True) if cluster_spec: session_config.cluster_def.CopyFrom(cluster_spec.as_cluster_def()) with ops.Graph().as_default(): with session_lib.Session(config=session_config, target=master) as sess: sess.run(tpu.shutdown_system()) else: raise RuntimeError("initialize_tpu_system is not supported within " "tf.functions.") logging.info("Finished shutting down TPU system.") if tpu_name in _INITIALIZED_TPU_SYSTEMS: del _INITIALIZED_TPU_SYSTEMS[tpu_name]
def callback(self, op_type, inputs, attrs, outputs, op_name=None, graph=None): """Op callback for tracing (dumping) a TF program's execution.""" del attrs # Unused writer = self.get_writer() if graph: is_v1_graph_mode = not ops.executing_eagerly_outside_functions() context_id = self._get_context_id(graph) # Innermost context ID. output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs)) if op_type in ("Placeholder", "PlaceholderWithDefault"): # In some cases, the op name of a Placeholder op in a graph # can be duplicate (e.g., with the name "resource"). # When this happens, we give the op an debugger-generated name # in order to prevent problems and check failures down the pipe. op_name = "%s_%d" % (op_name, self._symbolic_tensor_counter) if is_v1_graph_mode: for input_tensor in inputs: # TODO(cais): if input_tensor in self._placeholder_to_debug_tensor and outputs: outputs[0].op._add_control_input( # pylint: disable=protected-access self._placeholder_to_debug_tensor[input_tensor].op) graph_op_creation = debug_event_pb2.GraphOpCreation( op_type=op_type, op_name=op_name, graph_name=graph.name if hasattr(graph, "name") else None, graph_id=context_id, input_names=[input_tensor.name for input_tensor in inputs], num_outputs=len(outputs), output_tensor_ids=output_tensor_ids, code_location=self._process_stack_frames()) writer.WriteGraphOpCreation(graph_op_creation) if outputs and compat.as_bytes( op_type) not in op_callbacks_common.OP_CALLBACK_SKIP_OPS: return self._instrument_symbolic_tensors( outputs, op_type, op_name, context_id, output_tensor_ids) else: op_type_bytes = compat.as_bytes(op_type) if op_type_bytes == b"DebugNumericSummaryV2": # TODO(b/140334369): Remove this special casing logic once op_callback. # automatically prevents infinite recursion in eager mode. return None if op_type_bytes in op_callbacks_common.OP_CALLBACK_SKIP_OPS: return None context_id = self._func_graph_id_from_func_name(op_type) input_ids = [t._id for t in inputs] # pylint:disable=protected-access output_tensor_device_ids = [ writer.RegisterDeviceAndGetId(output.device) for output in outputs ] if outputs else [] writer.WriteExecution( self._dump_eager_tensors(outputs, op_type, input_ids, output_tensor_device_ids, graph_id=context_id))
def apply_gradients(self, grads_and_vars, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ grads_and_vars = _filter_grads(grads_and_vars) var_list = [v for (_, v) in grads_and_vars] if distribute_ctx.has_distribution_strategy(): reduced_grads = merge_grads(grads_and_vars) grads_and_vars = zip(reduced_grads, var_list) with ops.init_scope(): self._prepare() self._create_slots(var_list) update_ops = [] def update_grad_to_var(grad, var): """Apply gradient to variable.""" if isinstance(var, ops.Tensor): raise NotImplementedError("Trying to update a Tensor ", var) if isinstance(grad, ops.IndexedSlices): if var.constraint is not None: raise RuntimeError( "Cannot use a constraint function on a sparse variable." ) return self._resource_apply_sparse_duplicate_indices( grad.values, var, grad.indices) update_op = self._resource_apply_dense(grad, var) if var.constraint is not None: with ops.control_dependencies([update_op]): return var.assign(var.constraint(var)) else: return update_op with ops.name_scope(name, self._name) as name: for grad, var in grads_and_vars: scope_name = ("" if ops.executing_eagerly_outside_functions() else "_" + var.op.name) with ops.name_scope("update" + scope_name): update_ops.append(update_grad_to_var(grad, var)) # control dependencies does not work in per replica mode, please change # this once b/118841692 is fixed. # with ops.control_dependencies(update_ops): # apply_updates = self._iterations.assign_add(1).op apply_updates = merge_update_step(update_ops, self.iterations) return apply_updates
def _create_keras_history_helper(tensors, processed_ops, created_layers): """Helper method for `create_keras_history`. Args: tensors: A structure of Tensors for which to create Keras metadata. processed_ops: Set. TensorFlow operations that have already been wrapped in `TensorFlowOpLayer` instances. created_layers: List. The `TensorFlowOpLayer` instances created. Returns: Tuple. First element is the updated set of TensorFlow Operations that have been wrapped in `TensorFlowOpLayer` instances. Second element is a list of the `TensorFlowOpLayer` instances created. """ # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. # Cannot be imported at top because of circular dependencies. # TODO(omalleyt): Resolve circular dependency. from tensorflow.python.keras.engine import base_layer # pylint: disable=g-import-not-at-top tensor_list = nest.flatten(tensors) sparse_ops = [] ragged_tensors = [] for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue if isinstance( tensor, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): sparse_ops.append(tensor.op) continue if tf_utils.is_ragged(tensor): # Ragged tensors don't have an op property ragged_tensors.append(tensor) continue op = tensor.op # The Op that created this Tensor. if op not in processed_ops: # Recursively set `_keras_history`. op_inputs = list(op.inputs) constants = {} layer_inputs = [] for i, op_input in enumerate(op_inputs): if uses_keras_history(op_input): layer_inputs.append(op_input) else: # Treat any value not originating from a `keras.Input` as # a constant. Variables cannot be supported. ds_with_session = ( distribution_strategy_context.in_cross_replica_context( ) and not ops.executing_eagerly_outside_functions()) using_xla = control_flow_util.GraphOrParentsInXlaContext( ops.get_default_graph()) if ds_with_session or using_xla or _UNSAFE_GRAPH_OP_LAYER_CREATION: # In Legacy Graph mode, evaluating here makes Session be # configured improperly. The downside of this is that saving # via `get_config` breaks, but SavedModel still works. constants[i] = op_input else: with ops.init_scope(): if ops.executing_eagerly_outside_functions(): constants[ i] = backend.eval_in_eager_or_function( op_input) else: constants[i] = backend.function([], op_input)([]) layer_inputs = unnest_if_single_tensor(layer_inputs) processed_ops, created_layers = _create_keras_history_helper( layer_inputs, processed_ops, created_layers) name = op.name node_def = op.node_def.SerializeToString() op_layer = base_layer.TensorFlowOpLayer(node_def, constants=constants, name=name) created_layers.append(op_layer) op_layer._set_connectivity_metadata( # pylint: disable=protected-access args=(layer_inputs, ), kwargs={}, outputs=op.outputs) processed_ops.update([op]) if sparse_ops or ragged_tensors: lambda_example = """ weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) output = tf.keras.layers.Lambda(weights_mult)(input) """ raise ValueError( 'Tensorflow ops that generate ragged or sparse tensor ' 'outputs are currently not supported by Keras automatic ' 'op wrapping. Please wrap these ops in a Lambda layer: ' '\n\n```\n{example}\n```\n' 'Sparse ops encountered: {sparse_ops}\n' 'Ragged tensors encountered: {ragged_tensors}\n'.format( example=lambda_example, sparse_ops=str(sparse_ops), ragged_tensors=str(ragged_tensors))) return processed_ops, created_layers
def _create_keras_history_helper(tensors, processed_ops, created_layers): """Helper method for `create_keras_history`. Arguments: tensors: A structure of Tensors for which to create Keras metadata. processed_ops: Set. TensorFlow operations that have already been wrapped in `TensorFlowOpLayer` instances. created_layers: List. The `TensorFlowOpLayer` instances created. Returns: Tuple. First element is the updated set of TensorFlow Operations that have been wrapped in `TensorFlowOpLayer` instances. Second element is a list of the `TensorFlowOpLayer` instances created. """ # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. # Cannot be imported at top because of circular dependencies. # TODO(omalleyt): Resolve circular dependency. from tensorflow.python.keras.engine import base_layer # pylint: disable=g-import-not-at-top tensor_list = nest.flatten(tensors) for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue op = tensor.op # The Op that created this Tensor. if op not in processed_ops: if op.type.startswith('Sparse'): lambda_example = """ weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) output = tf.keras.layers.Lambda(weights_mult)(input) """ raise ValueError( 'Sparse ops are not supported with functional models with built-in ' 'layer wrapping. Please wrap the sparse ops in a Lambda layer like' ': \n{lambda_example}\n'.format( lambda_example=lambda_example)) # Recursively set `_keras_history`. op_inputs = list(op.inputs) constants = {} layer_inputs = [] for i, op_input in enumerate(op_inputs): if uses_keras_history(op_input): layer_inputs.append(op_input) else: # Treat any value not originating from a `keras.Input` as # a constant. Variables cannot be supported. if (distribution_strategy_context.in_cross_replica_context( ) and not ops.executing_eagerly_outside_functions()): # In Legacy Graph mode, evaluating here makes Session be # configured improperly. constants[i] = op_input else: with ops.init_scope(): constants[i] = backend.function([], op_input)([]) processed_ops, created_layers = _create_keras_history_helper( layer_inputs, processed_ops, created_layers) name = op.name node_def = op.node_def.SerializeToString() op_layer = base_layer.TensorFlowOpLayer(node_def, constants=constants, name=name) created_layers.append(op_layer) op_layer._add_inbound_node( # pylint: disable=protected-access layer_inputs, op.outputs) processed_ops.update([op]) return processed_ops, created_layers
def is_in_tf_function(): """Returns if inside of a tf.function.""" return (ops.executing_eagerly_outside_functions() and not context.executing_eagerly() and not is_in_keras_graph())
def check_graph_consistency(tensor=None, method='add_loss', force_raise=False): """Checks that tensors passed to `add_*` method match the Keras graph. When one of the `add_*` method is called inside a V2 conditional branch, the underlying tensor gets created in a FuncGraph managed by control_flow_v2. We need to raise clear error messages in such cases. Arguments: tensor: Tensor to check, or `False` if it is known that an error should be raised. method: Caller method, one of {'add_metric', 'add_loss', 'add_update'}. force_raise: If an error should be raised regardless of `tensor`. Raises: RuntimeError: In case of an out-of-graph tensor. """ if (force_raise or (ops.executing_eagerly_outside_functions() and hasattr(tensor, 'graph') and isinstance(tensor.graph, (control_flow_util_v2.CondBranchFuncGraph, control_flow_util_v2.WhileCondFuncGraph, control_flow_util_v2.WhileBodyFuncGraph)))): if method == 'activity_regularizer': bad_example = """ class TestModel(tf.keras.Model): def __init__(self): super(TestModel, self).__init__(name='test_model') self.dense = tf.keras.layers.Dense(2, activity_regularizer='l2') def call(self, x, training=None): if training: return self.dense(x) else: return self.dense(x) """ correct_example = """ class TestModel(tf.keras.Model): def __init__(self): super(TestModel, self).__init__(name='test_model') self.dense = tf.keras.layers.Dense(2, activity_regularizer='l2') def call(self, x, training=None): return self.dense(x) """ raise RuntimeError( 'You are using a layer with `activity_regularizer` in a control flow ' 'branch, e.g.:\n{bad_example}\nThis is currently not supported. ' 'Please move your call to the layer with `activity_regularizer` out ' 'of the control flow branch, e.g.:\n{correct_example}\n' 'You can also resolve this by marking your outer model/layer dynamic' ' (eager-only) by passing `dynamic=True` to the layer constructor. ' 'Any kind of control flow is supported with dynamic layers. ' 'Note that using `dynamic=True` requires you to implement static ' 'shape inference in the `compute_output_shape(input_shape)` ' 'method.'.format(bad_example=bad_example, correct_example=correct_example)) if method == 'add_metric': bad_example = """ def call(self, inputs, training=None): if training: metric = compute_metric(inputs) self.add_metric(metric, name='my_metric', aggregation='mean') return inputs """ correct_example = """ def call(self, inputs, training=None): if training: metric = compute_metric(inputs) else: metric = 0. self.add_metric(metric, name='my_metric', aggregation='mean') return inputs """ elif method == 'add_loss': bad_example = """ def call(self, inputs, training=None): if training: loss = compute_loss(inputs) self.add_loss(loss) return inputs """ correct_example = """ def call(self, inputs, training=None): if training: loss = compute_loss(inputs) else: loss = 0. self.add_loss(loss) return inputs """ else: bad_example = """ def call(self, inputs, training=None): if training: self.add_update(self.w.assign_add(1)) return inputs """ correct_example = """ def call(self, inputs, training=None): if training: increment = 1 else: increment = 0 self.add_update(self.w.assign_add(increment)) return inputs """ raise RuntimeError( 'You are using the method `{method}` in a control flow branch ' 'in your layer, e.g.:\n{bad_example}\n' 'This is not currently supported. ' 'Please move your call to {method} out of the control flow branch, ' 'e.g.:\n{correct_example}\n' 'You can also resolve this by marking your layer ' 'as dynamic (eager-only) by passing ' '`dynamic=True` to the layer constructor. ' 'Any kind of control flow is supported with dynamic layers. ' 'Note that using `dynamic=True` requires you ' 'to implement static shape inference ' 'in the `compute_output_shape(input_shape)` method.'.format( method=method, bad_example=bad_example, correct_example=correct_example))
def _initialize_multi_worker(self, cluster_resolver): """Initializes the object for multi-worker training.""" cluster_spec = multi_worker_util.normalize_cluster_spec( cluster_resolver.cluster_spec()) task_type = cluster_resolver.task_type task_id = cluster_resolver.task_id if task_type is None or task_id is None: raise ValueError( "When `cluster_spec` is given, you must also specify " "`task_type` and `task_id`.") self._cluster_spec = cluster_spec self._task_type = task_type self._task_id = task_id self._id_in_cluster = multi_worker_util.id_in_cluster( self._cluster_spec, self._task_type, self._task_id) self._num_workers = multi_worker_util.worker_count( cluster_spec, task_type) if not self._num_workers: raise ValueError( "No `worker`, `chief` or `evaluator` tasks can be found " "in `cluster_spec`.") self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type, task_id) self._worker_device = "/job:%s/task:%d" % (task_type, task_id) self._host_input_device = numpy_dataset.SingleDevice( self._worker_device) if (ops.executing_eagerly_outside_functions() and not getattr(self, "_local_or_standalone_client_mode", False)): context.context().configure_collective_ops( collective_leader=multi_worker_util.collective_leader( cluster_spec, task_type, task_id), scoped_allocator_enabled_ops=("CollectiveReduce", ), device_filters=("/job:%s/task:%d" % (task_type, task_id), )) self._collective_ops_configured = True # Starting a std server in eager mode and in independent worker mode. if (context.executing_eagerly() and not getattr(self, "_std_server_started", False) and not getattr(self, "_local_or_standalone_client_mode", False)): # Checking _local_or_standalone_client_mode as well because we should not # create the std server in standalone client mode. config_proto = copy.deepcopy(context.context().config) config_proto = self._update_config_proto(config_proto) if hasattr(cluster_resolver, "port"): port = cluster_resolver.port else: port = 0 server_def = tensorflow_server_pb2.ServerDef( cluster=cluster_spec.as_cluster_def(), default_session_config=config_proto, job_name=task_type, task_index=task_id, protocol=cluster_resolver.rpc_layer or "grpc", port=port) context.context().enable_collective_ops(server_def) self._std_server_started = True # The `ensure_initialized` is needed before calling # `context.context().devices()`. context.context().ensure_initialized() logging.info( "Enabled multi-worker collective ops with available devices: %r", context.context().devices()) # TODO(yuefengz): The `num_gpus` is only for this particular task. It # assumes all workers have the same number of GPUs. We should remove this # assumption by querying all tasks for their numbers of GPUs. # TODO(b/126786766): TFConfigClusterResolver returns wrong number of GPUs in # some cases. if isinstance(cluster_resolver, TFConfigClusterResolver): num_gpus = context.num_gpus() else: num_gpus = cluster_resolver.num_accelerators().get("GPU", 0) if num_gpus: local_devices = tuple("%s/device:GPU:%d" % (self._worker_device, i) for i in range(num_gpus)) else: local_devices = (self._worker_device, ) self._collective_keys = cross_device_utils.CollectiveKeys() self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=local_devices, group_size=len(local_devices) * self._num_workers, collective_keys=self._collective_keys, communication=self._communication) # CrossDeviceOps for per host tensors. self._host_cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=[self._worker_device], group_size=self._num_workers, collective_keys=self._collective_keys, communication=cross_device_ops_lib.CollectiveCommunication.RING, ) super(CollectiveAllReduceExtended, self)._initialize_single_worker(local_devices) # Add a default device so that ops without specified devices will not end up # on other workers. self._default_device = "/job:%s/task:%d" % (task_type, task_id) # Save the num_gpus_per_worker and rpc_layer for configure method. self._num_gpus_per_worker = num_gpus self._rpc_layer = cluster_resolver.rpc_layer self._warn_nccl_no_gpu() # TODO(b/151232436): Enable check health thread by default. if self._enable_check_health: self._start_check_health_thread() logging.info( "MultiWorkerMirroredStrategy with cluster_spec = %r, task_type = %r, " "task_id = %r, num_workers = %r, local_devices = %r, " "communication = %s", cluster_spec.as_dict(), task_type, task_id, self._num_workers, local_devices, self._communication)
def get_job_name(self): if ops.executing_eagerly_outside_functions() or self._should_resolve( ) or is_running_in_gce(): return self.task_type
def _instrument_symbolic_tensors(self, tensors, op_type, op_name, tfdbg_context_id, tensor_ids): """Add debugging instrumentation for symbolic (i.e., non-eager) tensors. The detailed fashion in which the tensors are instrumented is determined by the tensor_debug_mode configured for the currently enabled dumping callback. Args: tensors: A tuple of Tensors to instrument. It is assumed that their ordering corresponds to the ordering of output tensors of an original op. Output slot indices (0-based) will be generated based on the ordering. op_type: Type name of the op that emits the Tensors (e.g., "MatMul"). op_name: Name of the op that emits the Tensors (e.g., "dense_1/MatMul"). tfdbg_context_id: A unique ID for the context that the op belongs to (e.g., a graph). tensor_ids: A list of unique ID numbers for the tensors, for tfdbg's internal use. Returns: Non-eager Tensors that override the `tensors` as the output of the op that originally generated `tensors`. In some cases (e.g., non-V1 graph mode), this may be `None`, as the instrumentation can simply rely on automatic control dependencies (see `auto_control_deps.py`) instead of tensor overriding. """ tensor_debug_mode = self._tensor_debug_mode debug_urls = ["file://%s" % self._dump_root] is_v1_graph_mode = not ops.executing_eagerly_outside_functions() instrumented_tensors = [] if is_v1_graph_mode else None if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR: for output_slot, tensor in enumerate(tensors): if (not self._should_dump_tensor(op_type, tensor.dtype) or not tensor.dtype.is_numpy_compatible): if is_v1_graph_mode: instrumented_tensors.append(tensor) continue if is_v1_graph_mode and not tensor.dtype.is_numpy_compatible: # Avoid instrumenting Placeholder under is_v1_graph_mode. Doing that # would cause runtime complaint about Placeholders not being fed. instrumented_tensors.append(tensor) continue # Except in V1 graph mode + control flow, debug_identity_v2 triggers # auto control dependency because it's a stateful op. debug_tensor = gen_debug_ops.debug_identity_v2( # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode # as a low-overhead placeholder, since no actual tensor value is # traced. constant_op.constant([], dtype=dtypes.float32), tfdbg_context_id=tfdbg_context_id, op_name=op_name, output_slot=output_slot, tensor_debug_mode=self._tensor_debug_mode, debug_urls=debug_urls) if is_v1_graph_mode: instrumented_tensors.append( self._process_v1_graph_mode_tensor( op_type, tensor, debug_tensor, tensor_debug_mode)) return instrumented_tensors elif tensor_debug_mode in ( debug_event_pb2.TensorDebugMode.CURT_HEALTH, debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, debug_event_pb2.TensorDebugMode.FULL_HEALTH, debug_event_pb2.TensorDebugMode.SHAPE): for output_slot, tensor in enumerate(tensors): dtype = tensor.dtype dtype_is_dumpable = ( tensor_debug_mode in (debug_event_pb2.TensorDebugMode.CURT_HEALTH, debug_event_pb2.TensorDebugMode.CONCISE_HEALTH, debug_event_pb2.TensorDebugMode.FULL_HEALTH) and dtype.is_floating or tensor_debug_mode == debug_event_pb2.TensorDebugMode.SHAPE and (dtype.is_floating or dtype.is_integer or dtype.is_bool)) if (not self._should_dump_tensor(op_type, tensor.dtype) or not dtype_is_dumpable): if is_v1_graph_mode: instrumented_tensors.append(tensor) continue debug_tensor = gen_debug_ops.debug_identity_v2( gen_debug_ops.debug_numeric_summary_v2( tensor, tensor_id=tensor_ids[output_slot], tensor_debug_mode=self._tensor_debug_mode, output_dtype=dtypes.float64), tfdbg_context_id=tfdbg_context_id, op_name=op_name, output_slot=output_slot, tensor_debug_mode=self._tensor_debug_mode, debug_urls=debug_urls) if is_v1_graph_mode: instrumented_tensors.append( self._process_v1_graph_mode_tensor( op_type, tensor, debug_tensor, tensor_debug_mode)) return instrumented_tensors elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR: for output_slot, tensor in enumerate(tensors): if (not self._should_dump_tensor(op_type, tensor.dtype) or not tensor.dtype.is_numpy_compatible): # Instrumenting DT_VARIANT and DT_RESOURCE type tensors under # V1 graph mode is known to have issues. TODO(cais): Investigate. if is_v1_graph_mode: instrumented_tensors.append(tensor) continue debug_tensor = gen_debug_ops.debug_identity_v2( tensor, tfdbg_context_id=tfdbg_context_id, op_name=op_name, output_slot=output_slot, tensor_debug_mode=self._tensor_debug_mode, debug_urls=debug_urls) if is_v1_graph_mode: instrumented_tensors.append( self._process_v1_graph_mode_tensor( op_type, tensor, debug_tensor, tensor_debug_mode)) return instrumented_tensors else: raise NotImplementedError( "Symbolic tensor instrumentation is not implemented for debug mode " "%s" % self._tensor_debug_mode)
def clone_and_build_model( model, input_tensors=None, target_tensors=None, custom_objects=None, compile_clone=True, in_place_reset=False, optimizer_iterations=None, optimizer_config=None): """Clone a `Model` and build/compile it with the same settings used before. This function can be run in the same graph or in a separate graph from the model. When using a separate graph, `in_place_reset` must be `False`. Note that, currently, the clone produced from this function may not work with TPU DistributionStrategy. Try at your own risk. Args: model: `tf.keras.Model` object. Can be Functional, Sequential, or sub-classed. input_tensors: Optional list or dictionary of input tensors to build the model upon. If not provided, placeholders will be created. target_tensors: Optional list of target tensors for compiling the model. If not provided, placeholders will be created. custom_objects: Optional dictionary mapping string names to custom classes or functions. compile_clone: Boolean, whether to compile model clone (default `True`). in_place_reset: Boolean, whether to reset the model in place. Only used if the model is a subclassed model. In the case of a subclassed model, this argument must be set to `True` (default `False`). To restore the original model, use the function `in_place_subclassed_model_state_restoration(model)`. optimizer_iterations: An iterations variable that will be incremented by the optimizer if the clone is compiled. This argument is used when a Keras model is cloned into an Estimator model function, because Estimators create their own global step variable. optimizer_config: Optimizer config dictionary or list of dictionary returned from `get_config()`. This argument should be defined if `clone_and_build_model` is called in a different graph or session from the original model, and the optimizer is an instance of `OptimizerV2`. Returns: Clone of the model. Raises: ValueError: Cloning fails in the following cases - cloning a subclassed model with `in_place_reset` set to False. - compiling the clone when the original model has not been compiled. """ # Grab optimizer now, as we reset-in-place for subclassed models, but # want to maintain access to the original optimizer. orig_optimizer = model.optimizer if compile_clone and not orig_optimizer: raise ValueError( 'Error when cloning model: compile_clone was set to True, but the ' 'original model has not been compiled.') if compile_clone: compile_args = model._get_compile_args() # pylint: disable=protected-access # Allows this method to be robust to switching graph and eager classes. model._get_compile_args = lambda: compile_args with CustomObjectScope(custom_objects or {}): if model._is_graph_network: clone = clone_model(model, input_tensors=input_tensors) elif isinstance(model, Sequential): clone = clone_model(model, input_tensors=input_tensors) if (not clone._is_graph_network and model._build_input_shape is not None): if ops.executing_eagerly_outside_functions(): clone.build(model._build_input_shape) else: clone._set_inputs( K.placeholder( model._build_input_shape, dtype=model.inputs[0].dtype)) else: try: # Prefer cloning the model if serial/deserial logic is implemented for # subclassed model. clone = model.__class__.from_config(model.get_config()) except NotImplementedError: logging.warning('This model is a subclassed model. Please implement ' '`get_config` and `from_config` to better support ' 'cloning the model.') if not in_place_reset: raise ValueError( 'This model is a subclassed model. ' 'Such a model cannot be cloned, but there is a workaround where ' 'the model is reset in-place. To use this, please set the ' 'argument `in_place_reset` to `True`. This will reset the ' 'attributes in the original model. To restore the attributes, ' 'call `in_place_subclassed_model_state_restoration(model)`.') clone = model _in_place_subclassed_model_reset(clone) if input_tensors is not None: if isinstance(input_tensors, (list, tuple)) and len(input_tensors) == 1: input_tensors = input_tensors[0] clone._set_inputs(input_tensors) if compile_clone: if isinstance(orig_optimizer, optimizer_v1.TFOptimizer): optimizer = optimizer_v1.TFOptimizer( orig_optimizer.optimizer, optimizer_iterations) K.track_tf_optimizer(optimizer) else: if not isinstance(orig_optimizer, (tuple, list)): orig_optimizer = [orig_optimizer] if optimizer_config is None: optimizer = [ opt.__class__.from_config(opt.get_config()) for opt in orig_optimizer ] elif isinstance(optimizer_config, dict): optimizer = [orig_optimizer[0].__class__.from_config(optimizer_config)] else: # optimizer config is list of dict, same order as orig_optimizer. optimizer = [ opt.__class__.from_config(opt_config) for (opt, opt_config) in zip(orig_optimizer, optimizer_config) ] if optimizer_iterations is not None: for opt in optimizer: opt.iterations = optimizer_iterations if len(optimizer) == 1: optimizer = optimizer[0] compile_args['optimizer'] = optimizer if target_tensors is not None: compile_args['target_tensors'] = target_tensors # Ensure Metric objects in new model are separate from existing model. compile_args['metrics'] = metrics_module.clone_metrics( compile_args['metrics']) compile_args['weighted_metrics'] = metrics_module.clone_metrics( compile_args['weighted_metrics']) clone.compile(**compile_args) return clone
def _in_place_subclassed_model_reset(model): """Substitute for model cloning that works for subclassed models. Subclassed models cannot be cloned because their topology is not serializable. To "instantiate" an identical model in a new TF graph, we reuse the original model object, but we clear its state. After calling this function on a model instance, you can use the model instance as if it were a model clone (in particular you can use it in a new graph). This method clears the state of the input model. It is thus destructive. However the original state can be restored fully by calling `_in_place_subclassed_model_state_restoration`. Args: model: Instance of a Keras model created via subclassing. Raises: ValueError: In case the model uses a subclassed model as inner layer. """ assert not model._is_graph_network # Only makes sense for subclassed networks # Select correct base class for new Model. version_utils.swap_class(model.__class__, training.Model, training_v1.Model, ops.executing_eagerly_outside_functions()) # Retrieve all layers tracked by the model as well as their attribute names attributes_cache = {} for name in dir(model): # Skip attrs that track other trackables. if name == 'submodules' or name == '_self_tracked_trackables': continue try: value = getattr(model, name) except (AttributeError, ValueError, TypeError): continue if isinstance(value, Layer): attributes_cache[name] = value assert value in model.layers if hasattr(value, 'layers') and value.layers: raise ValueError('We do not support the use of nested layers ' 'in `model_to_estimator` at this time. Found nested ' 'layer: %s' % value) elif isinstance( value, (list, tuple)) and name not in ('layers', '_layers', 'metrics', '_compile_metric_functions', '_output_loss_metrics'): # Handle case: list/tuple of layers (also tracked by the Network API). if value and all(isinstance(val, Layer) for val in value): raise ValueError('We do not support the use of list-of-layers ' 'attributes in subclassed models used with ' '`model_to_estimator` at this time. Found list ' 'model: %s' % name) # Replace layers on the model with fresh layers layers_to_names = {value: key for key, value in attributes_cache.items()} original_layers = list( model._flatten_layers(include_self=False, recursive=False)) setattr_tracking = model._setattr_tracking model._setattr_tracking = False model._self_tracked_trackables = [] for layer in original_layers: # We preserve layer order. config = layer.get_config() # This will not work for nested subclassed models used as layers. # This would be theoretically possible to support, but would add complexity. # Only do it if users complain. if isinstance(layer, training.Model) and not layer._is_graph_network: raise ValueError('We do not support the use of nested subclassed models ' 'in `model_to_estimator` at this time. Found nested ' 'model: %s' % layer) fresh_layer = layer.__class__.from_config(config) name = layers_to_names[layer] setattr(model, name, fresh_layer) model._self_tracked_trackables.append(fresh_layer) # Cache original model build attributes (in addition to layers) if (not hasattr(model, '_original_attributes_cache') or model._original_attributes_cache is None): if model.built: attributes_to_cache = [ 'inputs', 'outputs', 'total_loss', 'optimizer', 'train_function', 'test_function', 'predict_function', '_training_endpoints', '_collected_trainable_weights', '_feed_inputs', '_feed_input_names', '_feed_input_shapes', ] for name in attributes_to_cache: attributes_cache[name] = getattr(model, name) model._original_attributes_cache = attributes_cache _reset_build_compile_trackers(model) model._setattr_tracking = setattr_tracking
def cluster_spec(self): """Returns a ClusterSpec object based on the latest TPU information. We retrieve the information from the GCE APIs every time this method is called. Returns: A ClusterSpec containing host information returned from Cloud TPUs, or None. Raises: RuntimeError: If the provided TPU is not healthy. """ ############################################################################ # There are 5 potential cases this code must handle: # 1. [Normal case.] We should resolve the TPU name to a set of tasks, and # a. Create a ClusterSpec that includes the coordinator job # b. Create a ClusterSpec without the coordinator job. # 2. [GKE / No API Access.] We should not resolve the TPU name to a set of # tasks and # a. Create a ClusterSpec with the coordinator # b. Create a ClusterSpec without the coordinator # 3. [Other (legacy non-gRPC).] We should return None. ############################################################################ if self._should_resolve(): # Case 1. response = self._fetch_cloud_tpu_metadata() # pylint: disable=protected-access if 'state' in response and response['state'] != 'READY': raise RuntimeError( 'TPU "%s" is not yet ready; state: "%s"' % (compat.as_text(self._tpu), response['state'])) if 'networkEndpoints' in response: worker_list = [ '%s:%s' % (endpoint['ipAddress'], endpoint['port']) for endpoint in response['networkEndpoints'] ] else: # Fall back to the deprecated response format instance_url = '%s:%s' % (response['ipAddress'], response['port']) worker_list = [instance_url] cluster_spec = {self.task_type: worker_list} else: is_eager = ops.executing_eagerly_outside_functions() if self.rpc_layer is None and not is_eager: # Case 3. return None # Case 2. tpus = [] for tpu in compat.as_text(self._tpu).split(_ENDPOINTS_SEPARATOR): # We are working around the fact that GKE environment variable that is # supplied to us has the protocol string embedded in it, but we want # to strip it out for the ClusterSpec. if (self.rpc_layer is not None and tpu.startswith(self.rpc_layer + '://')): tpus.append(tpu[len(self.rpc_layer + '://'):]) else: tpus.append(tpu) cluster_spec = {self.task_type: tpus} if self._coordinator_address: # {1, 2}.a cluster_spec[self._coordinator_name] = [self._coordinator_address] return server_lib.ClusterSpec(cluster_spec)
def f(): # Minimize both the AutoCastVariable and the normal tf.Variable. Both # variables should be updated to the same value. op = opt.minimize(lambda: x + y, var_list=[x, y]) return None if ops.executing_eagerly_outside_functions() else op
def _check_model_class(self, model_class): if ops.executing_eagerly_outside_functions(): self.assertEqual(model_class, training.Model) else: self.assertEqual(model_class, training_v1.Model)
def _initialize_local(self, cluster_resolver, devices=None): """Initializes the object for local training.""" self._is_chief = True self._num_workers = 1 if ops.executing_eagerly_outside_functions(): try: context.context().configure_collective_ops( scoped_allocator_enabled_ops=("CollectiveReduce", )) except RuntimeError: logging.warning( "Collective ops is not configured at program startup. " "Some performance features may not be enabled.") self._collective_ops_configured = True # TODO(b/126786766): TFConfigClusterResolver returns wrong number of GPUs in # some cases. if isinstance(cluster_resolver, TFConfigClusterResolver): num_gpus = context.num_gpus() else: num_gpus = cluster_resolver.num_accelerators().get("GPU", 0) if devices: local_devices = devices else: if num_gpus: local_devices = tuple("/device:GPU:%d" % i for i in range(num_gpus)) else: local_devices = ("/device:CPU:0", ) self._worker_device = device_util.canonicalize("/device:CPU:0") self._host_input_device = numpy_dataset.SingleDevice( self._worker_device) self._collective_keys = cross_device_utils.CollectiveKeys() self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=local_devices, group_size=len(local_devices), collective_keys=self._collective_keys, communication=self._communication) # CrossDeviceOps for per host tensors. self._host_cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=[self._worker_device], group_size=self._num_workers, collective_keys=self._collective_keys, communication=cross_device_ops_lib.CollectiveCommunication.RING, ) super(CollectiveAllReduceExtended, self)._initialize_single_worker(local_devices) self._cluster_spec = None self._task_type = None self._task_id = None self._id_in_cluster = 0 # This is a mark to tell whether we are running with standalone client or # independent worker. Right now with standalone client, strategy object is # created as local strategy and then turn into multi-worker strategy via # configure call. self._local_or_standalone_client_mode = True # Save the num_gpus_per_worker and rpc_layer for configure method. self._num_gpus_per_worker = num_gpus self._rpc_layer = cluster_resolver.rpc_layer self._warn_nccl_no_gpu() logging.info( "Single-worker MultiWorkerMirroredStrategy with local_devices " "= %r, communication = %s", local_devices, self._communication)
def __new__(cls, *args, **kwargs): # pylint: disable=unused-argument eager_enabled = ops.executing_eagerly_outside_functions() cls = swap_class(cls, training.Model, training_v1.Model, eager_enabled) return super(ModelVersionSelector, cls).__new__(cls)
def callback(self, op_type, inputs, attrs, outputs, op_name=None, graph=None): """Eager-function unified callback for checking numerics.""" del attrs, op_name # Unused op_type_bytes = compat.as_bytes(op_type) is_v1_graph_mode = not ops.executing_eagerly_outside_functions() if (op_type_bytes in op_callbacks_common.OP_CALLBACK_SKIP_OPS or op_type_bytes in SAFE_OPS): return None if graph: # Under graph mode. Insert check_numerics op. instrumented_outputs = [] if is_v1_graph_mode: for input_tensor in inputs: if input_tensor in self._placeholder_to_debug_tensor and outputs: outputs[0].op._add_control_input( # pylint: disable=protected-access self._placeholder_to_debug_tensor[input_tensor].op) for slot, output in enumerate(outputs): if (output.dtype.is_floating and (op_type_bytes, slot) not in IGNORE_OP_OUTPUTS): checked_output = array_ops.check_numerics_v2( # TF v2 has automatic control dependencies added to stateful async # ops, which allows us to run check_numerics asynchronously. # In the above case we use debug_summary to reduce all output # tensors asynchronously from the op being checked and then # process the tensor summary with check_numerics. output if is_v1_graph_mode else _debug_summary(output), get_check_numerics_error_message( slot, len(outputs), op_type, output, inputs, graph=graph, traceback=output.op.traceback)) _CHECK_NUMERICS_INPUT_LOOKUP[graph][ checked_output.name] = output instrumented_outputs.append( self._get_output_tensor(op_type_bytes, output, checked_output, is_v1_graph_mode)) else: instrumented_outputs.append(output) return instrumented_outputs else: if op_type_bytes == b"CheckNumericsV2": # TODO(b/140334369): Remove this special casing logic once op_callback. # automatically prevents infinite recursion in eager mode. return None # Under eager mode. Eagerly execute check_numerics op. for slot, output in enumerate(outputs): if (output.dtype.is_floating and (op_type_bytes, slot) not in IGNORE_OP_OUTPUTS): array_ops.check_numerics_v2( output, get_check_numerics_error_message( slot, len(outputs), op_type, output, inputs, stack_height_limit=self._stack_height_limit, path_length_limit=self._path_length_limit))
def __new__(cls, *args, **kwargs): # pylint: disable=unused-argument eager_enabled = ops.executing_eagerly_outside_functions() cls = swap_class(cls, base_layer.Layer, base_layer_v1.Layer, eager_enabled) return super(LayerVersionSelector, cls).__new__(cls)
def _use_collective_v2(self): if not ops.executing_eagerly_outside_functions(): return False return CollectiveReplicaLauncher._prefer_collective_v2
def _distributed_apply(distribution, grads_and_vars, name, apply_state): """`apply_gradients` using a `DistributionStrategy`.""" reduced_grads = distribution.extended.batch_reduce_to( ds_reduce_util.ReduceOp.SUM, grads_and_vars) var_list = [v for _, v in grads_and_vars] grads_and_vars = zip(reduced_grads, var_list) def apply_grad_to_update_var(var, grad): """Apply gradient to variable.""" if isinstance(var, ops.Tensor): raise NotImplementedError("Trying to update a Tensor ", var) apply_kwargs = {} if not isinstance(var, de.TrainableWrapper): if isinstance(grad, ops.IndexedSlices): if var.constraint is not None: raise RuntimeError( "Cannot use a constraint function on a sparse variable." ) if "apply_state" in self._sparse_apply_args: apply_kwargs["apply_state"] = apply_state return self._resource_apply_sparse_duplicate_indices( grad.values, var, grad.indices, **apply_kwargs) if "apply_state" in self._dense_apply_args: apply_kwargs["apply_state"] = apply_state update_op = self._resource_apply_dense(grad, var, **apply_kwargs) if var.constraint is not None: with ops.control_dependencies([update_op]): return var.assign(var.constraint(var)) else: return update_op else: with ops.colocate_with(None, ignore_existing=True): _slots = [ self.get_slot(var, _s) for _s in self.get_slot_names() ] with ops.control_dependencies([grad]): _before = [var.read_value() ] + [_s.read_value() for _s in _slots] if isinstance(grad, ops.IndexedSlices): if var.constraint is not None: raise RuntimeError( "Cannot use a constraint function on a sparse variable." ) if "apply_state" in self._sparse_apply_args: apply_kwargs["apply_state"] = apply_state with ops.control_dependencies(_before): _apply_op = self._resource_apply_sparse_duplicate_indices( grad.values, var, grad.indices, **apply_kwargs) with ops.control_dependencies([_apply_op]): _after = control_flow_ops.group( [var.update_op()] + [_s.update_op() for _s in _slots]) return _after if "apply_state" in self._dense_apply_args: apply_kwargs["apply_state"] = apply_state with ops.control_dependencies(_before): update_op = self._resource_apply_dense( grad, var, **apply_kwargs) if var.constraint is not None: with ops.control_dependencies([update_op]): return var.assign(var.constraint(var)) else: with ops.control_dependencies([update_op]): _after = control_flow_ops.group( [var.update_op()] + [_s.update_op() for _s in _slots]) return _after update_ops = [] with backend.name_scope(name or self._name): for grad, var in grads_and_vars: scope_name = ("update" if ops.executing_eagerly_outside_functions() else "update_" + var.op.name) # Colocate the update with variables to avoid unnecessary communication # delays. See b/136304694. with backend.name_scope( scope_name), distribution.extended.colocate_vars_with( var): update_ops.extend( distribution.extended.update(var, apply_grad_to_update_var, args=(grad, ), group=False)) any_symbolic = any( isinstance(i, ops.Operation) or tf_utils.is_symbolic_tensor(i) for i in update_ops) if not context.executing_eagerly() or any_symbolic: # If the current context is graph mode or any of the update ops are # symbolic then the step update should be carried out under a graph # context. (eager updates execute immediately) with ops._get_graph_from_inputs(update_ops).as_default(): # pylint: disable=protected-access with ops.control_dependencies(update_ops): return self._iterations.assign_add(1).op return self._iterations.assign_add(1)
def check_graph_consistency(tensor=None, method='add_loss', force_raise=False): """Checks that tensors passed to `add_*` method match the Keras graph. When one of the `add_*` method is called inside a V2 conditional branch, the underlying tensor gets created in a FuncGraph managed by control_flow_v2. We need to raise clear error messages in such cases. Arguments: tensor: Tensor to check, or `False` if it is known that an error should be raised. method: Caller method, one of {'add_metric', 'add_loss', 'add_update'}. force_raise: If an error should be raised regardless of `tensor`. Raises: RuntimeError: In case of an out-of-graph tensor. """ if (force_raise or (ops.executing_eagerly_outside_functions() and hasattr(tensor, 'graph') and isinstance(tensor.graph, (control_flow_util_v2.CondBranchFuncGraph, control_flow_util_v2.WhileCondFuncGraph, control_flow_util_v2.WhileBodyFuncGraph)))): if method == 'add_metric': bad_example = """ def call(self, inputs, training=None): if training: metric = compute_metric(inputs) self.add_metric(metric, name='my_metric', aggregation='mean') return inputs """ correct_example = """ def call(self, inputs, training=None): if training: metric = compute_metric(inputs) else: metric = 0. self.add_metric(metric, name='my_metric', aggregation='mean') return inputs """ elif method == 'add_loss': bad_example = """ def call(self, inputs, training=None): if training: loss = compute_loss(inputs) self.add_loss(loss) return inputs """ correct_example = """ def call(self, inputs, training=None): if training: loss = compute_loss(inputs) else: loss = 0. self.add_loss(loss) return inputs """ else: bad_example = """ def call(self, inputs, training=None): if training: self.add_update(self.w.assign_add(1)) return inputs """ correct_example = """ def call(self, inputs, training=None): if training: increment = 1 else: increment = 0 self.add_update(self.w.assign_add(increment)) return inputs """ raise RuntimeError( 'You are using the method `{method}` in a control flow branch ' 'in your layer, e.g.:\n{bad_example}\n' 'This is not currently supported. ' 'You should either use static control flow (`tf.cond`) ' 'or move your call to {method} out of the control flow branch, ' 'e.g.:\n{correct_example}\n' 'You can also resolve this by marking your layer ' 'as dynamic (eager-only) by passing ' '`dynamic=True` to the layer constructor. ' 'Any kind of control flow is supported with dynamic layers. ' 'Note that using `dynamic=True` requires you ' 'to implement static shape inference ' 'in the `compute_output_shape(input_shape)` method.'.format( method=method, bad_example=bad_example, correct_example=correct_example))
def add_weight(self, name, shape, dtype=None, initializer=None, regularizer=None, trainable=None, constraint=None, use_resource=None, synchronization=vs.VariableSynchronization.AUTO, aggregation=vs.VariableAggregation.NONE, partitioner=None, **kwargs): """Adds a new variable to the layer, or gets an existing one; returns it. Arguments: name: variable name. shape: variable shape. dtype: The type of the variable. Defaults to `self.dtype` or `float32`. initializer: initializer instance (callable). regularizer: regularizer instance (callable). trainable: whether the variable should be part of the layer's "trainable_variables" (e.g. variables, biases) or "non_trainable_variables" (e.g. BatchNorm mean, stddev). Note, if the current variable scope is marked as non-trainable then this parameter is ignored and any added variables are also marked as non-trainable. `trainable` defaults to `True` unless `synchronization` is set to `ON_READ`. constraint: constraint instance (callable). use_resource: Whether to use `ResourceVariable`. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class `tf.VariableSynchronization`. By default the synchronization is set to `AUTO` and the current `DistributionStrategy` chooses when to synchronize. If `synchronization` is set to `ON_READ`, `trainable` must not be set to `True`. aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class `tf.VariableAggregation`. partitioner: (optional) partitioner instance (callable). If provided, when the requested variable is created it will be split into multiple partitions according to `partitioner`. In this case, an instance of `PartitionedVariable` is returned. Available partitioners include `tf.compat.v1.fixed_size_partitioner` and `tf.compat.v1.variable_axis_size_partitioner`. For more details, see the documentation of `tf.compat.v1.get_variable` and the "Variable Partitioners and Sharding" section of the API guide. **kwargs: Additional keyword arguments. Returns: The created variable. Usually either a `Variable` or `ResourceVariable` instance. If `partitioner` is not `None`, a `PartitionedVariable` instance is returned. Raises: RuntimeError: If called with partioned variable regularization and eager execution is enabled. ValueError: When trainable has been set to True with synchronization set as `ON_READ`. """ for kwarg in kwargs: if kwarg != 'experimental_autocast': raise TypeError('Unknown keyword argument:', kwarg) if self._keras_style: return super(Layer, self).add_weight( name=name, shape=shape, dtype=dtype, initializer=initializer, regularizer=regularizer, trainable=trainable and self.trainable, constraint=constraint, use_resource=use_resource, synchronization=vs.VariableSynchronization.AUTO, aggregation=vs.VariableAggregation.NONE, partitioner=partitioner, **kwargs) if synchronization == vs.VariableSynchronization.ON_READ: if trainable: raise ValueError( 'Synchronization value can be set to ' 'VariableSynchronization.ON_READ only for non-trainable variables. ' 'You have specified trainable=True and ' 'synchronization=VariableSynchronization.ON_READ.') else: # Set trainable to be false when variable is to be synced on read. trainable = False elif trainable is None: trainable = True def _should_add_regularizer(variable, existing_variable_set): if isinstance(variable, tf_variables.PartitionedVariable): for var in variable: if var in existing_variable_set: return False return True else: return variable not in existing_variable_set init_graph = None if not context.executing_eagerly(): default_graph = ops.get_default_graph() if default_graph.building_function: with ops.init_scope(): # Retrieve the variables from the graph into which variables # will be lifted; if initialization ops will be lifted into # the eager context, then there is nothing to retrieve, since variable # collections are not supported when eager execution is enabled. if not context.executing_eagerly(): init_graph = ops.get_default_graph() existing_variables = set( tf_variables.global_variables()) else: # Initialization ops will not be lifted out of the default graph. init_graph = default_graph existing_variables = set(tf_variables.global_variables()) if dtype is None: dtype = self.dtype or dtypes.float32 self._set_scope(None) reuse = self.built or self._reuse prev_len_trainable = len(self._trainable_weights) with vs.variable_scope(self._scope, reuse=reuse, auxiliary_name_scope=False) as scope: self._current_scope = scope with ops.name_scope(self._name_scope()): use_resource = (use_resource or self._use_resource_variables or scope.use_resource) if initializer is None: initializer = scope.initializer variable = super(Layer, self).add_weight( name, shape, dtype=dtypes.as_dtype(dtype), initializer=initializer, trainable=trainable and self.trainable, constraint=constraint, partitioner=partitioner, use_resource=use_resource, synchronization=synchronization, aggregation=aggregation, getter=vs.get_variable, **kwargs) if regularizer: if (ops.executing_eagerly_outside_functions() or _should_add_regularizer(variable, existing_variables)): self._handle_weight_regularization( name, variable, regularizer) if init_graph is not None: # Handle edge case where a custom getter has overridden `trainable`. # There is one known occurrence of this, in unit test # testBasicRNNCellNotTrainable in # contrib.rnn.python.kernel_tests.core_rnn_cell_test with init_graph.as_default(): trainable_variables = tf_variables.trainable_variables( ) if (trainable and self.trainable and variable not in trainable_variables): # A custom getter / variable scope overrode the trainable flag. extra_trainable_vars = self._trainable_weights[ prev_len_trainable:] self._trainable_weights = self._trainable_weights[: prev_len_trainable] self._non_trainable_weights += extra_trainable_vars return variable
def initialize_tpu_system(cluster_resolver=None): """Initialize the TPU devices. Args: cluster_resolver: A tf.distribute.cluster_resolver.TPUClusterResolver, which provides information about the TPU cluster. Returns: The tf.tpu.Topology object for the topology of the TPU cluster. Raises: RuntimeError: If no TPU devices found for eager execution or if run in a tf.function. """ job = None if cluster_resolver is None: # If no cluster resolver is specified, and running eagerly, execute the init # ops in the current device scope. if context.executing_eagerly(): curr_device = device.DeviceSpec.from_string(context.context().device_name) if curr_device.job is not None: job = "{}/replica:0/task:0".format(curr_device.job) cluster_resolver = TPUClusterResolver("") assert isinstance(cluster_resolver, TPUClusterResolver) tpu_name = compat.as_text(cluster_resolver._tpu) # pylint: disable=protected-access if tpu_name in _INITIALIZED_TPU_SYSTEMS: logging.warning( "TPU system %s has already been initialized. " "Reinitializing the TPU can cause previously created " "variables on TPU to be lost.", tpu_name) logging.info("Initializing the TPU system: %s", tpu_name) if context.executing_eagerly(): # This function looks as it is for the following non-intuitive reasons. # tpu.initialize_system creates a dummy op whose sole purpose is to trigger # DistributedTPURewritePass. This pass actually adds real ops that # initialize the TPU system. Thus, we can't simply run tpu.initialize_system # eagerly. We need to wrap it in defun and trigger the rewrite passes on it. if tpu_name not in _LOCAL_MASTERS: # Explicitly place the tpu.initialize_system in the first worker to # avoid the output node match multiple devices error. job = "{}/replica:0/task:0".format(cluster_resolver.get_job_name()) @function.defun def _tpu_init_fn(): # In TF1, we usually close chips when compilation fails to clear the data # in infeed. In TF2, we don't need to do this because infeed is no longer # used, so user can recover from TPU compilation failures more smoothly. return tpu.initialize_system( job=job, compilation_failure_closes_chips=False) # The TPU_SYSTEM device must match the device used in tpu.initialize_system # exactly, otherwise you can get errors if there are multiple TPU_SYSTEM # devices available. with ops.device(tpu._tpu_system_device_name(job)): # pylint: disable=protected-access output = _tpu_init_fn() # Clear out the eager context caches since the memory is invalid now. logging.info("Clearing out eager caches") context.context()._clear_caches() # pylint: disable=protected-access serialized_topology = output.numpy() # TODO(b/134094971): Remove this when lazy tensor copy in multi-device # function has been implemented. context.context().mirroring_policy = context.MIRRORING_ALL elif not ops.executing_eagerly_outside_functions(): master = cluster_resolver.master() cluster_spec = cluster_resolver.cluster_spec() session_config = config_pb2.ConfigProto(allow_soft_placement=True) if cluster_spec: session_config.cluster_def.CopyFrom(cluster_spec.as_cluster_def()) with ops.Graph().as_default(): with session_lib.Session(config=session_config, target=master) as sess: serialized_topology = sess.run(tpu.initialize_system()) else: raise RuntimeError("initialize_tpu_system is not supported within " "tf.functions.") logging.info("Finished initializing TPU system.") tpu_topology = topology.Topology(serialized=serialized_topology) _INITIALIZED_TPU_SYSTEMS[tpu_name] = tpu_topology return tpu_topology
def __exit__(self, error_type, unused_value, unused_traceback): if error_type: # Allow errors that occurred inside this context manager to pass through # normally. return # Only run in V2 Function mode. if (context.executing_eagerly() or not ops.executing_eagerly_outside_functions()): return if (self._graph is not ops.get_default_graph() or self._graph.name != 'keras_graph'): # Only auto-track updates when the Keras Graph is the only one used. return new_operations = self._graph.get_operations()[self._num_operations:] new_stateful_ops = set() # pylint: disable=protected-access for op in new_operations: # While loop is not supported in general for automatic control # dependencies. if control_flow_util.IsInWhileLoop(op): continue # Track stateful ops via `add_update`. is_stateful_op = ( op.type not in self._graph._registered_ops or auto_control_deps.op_is_stateful( self._graph._registered_ops[op.type])) # Ignore ReadVariableOps as they are not needed to be run separately. # This ensures existing Layers don't get extra updates. if is_stateful_op and op.type != 'ReadVariableOp': new_stateful_ops.add(op) explicit_updates = set( [u for u in self.layer._unfiltered_updates if not isinstance(u, tuple)]) # pylint: enable=protected-access # Don't add updates that will already be run by virtue of being consumed by # other stateful ops or by the Layer's outputs. This ensures that existing # Layers like `BatchNormalization` continue to return the same values for # `.update` calls. minimum_ops = set() targets = new_stateful_ops.union( set(nest.flatten(self.outputs)), explicit_updates) for op in new_stateful_ops: # Scrub any ops that are consumed by the outputs or other stateful ops. reachable = tf_utils.get_reachable_from_inputs(op) if not (targets - {op}).intersection(reachable): minimum_ops.add(op) new_stateful_ops = minimum_ops # Don't double-track updates added via explicitly calling `add_update`. # Also don't double-track updates already tracked in sublayers. new_stateful_ops = new_stateful_ops - explicit_updates # Decide whether to track as input-conditional or unconditional. input_reachable_ops = tf_utils.get_reachable_from_inputs( self.inputs, targets=new_stateful_ops) unconditional_updates = new_stateful_ops - input_reachable_ops conditional_updates = new_stateful_ops - unconditional_updates if unconditional_updates: self.layer.add_update(list(unconditional_updates)) if conditional_updates: self.layer.add_update(list(conditional_updates), inputs=self.inputs)
def add_weight(self, name, shape, dtype=None, initializer=None, regularizer=None, trainable=None, constraint=None, use_resource=None, synchronization=vs.VariableSynchronization.AUTO, aggregation=vs.VariableAggregation.NONE, partitioner=None, **kwargs): """Adds a new variable to the layer, or gets an existing one; returns it. Arguments: name: variable name. shape: variable shape. dtype: The type of the variable. Defaults to `self.dtype` or `float32`. initializer: initializer instance (callable). regularizer: regularizer instance (callable). trainable: whether the variable should be part of the layer's "trainable_variables" (e.g. variables, biases) or "non_trainable_variables" (e.g. BatchNorm mean, stddev). Note, if the current variable scope is marked as non-trainable then this parameter is ignored and any added variables are also marked as non-trainable. `trainable` defaults to `True` unless `synchronization` is set to `ON_READ`. constraint: constraint instance (callable). use_resource: Whether to use `ResourceVariable`. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class `tf.VariableSynchronization`. By default the synchronization is set to `AUTO` and the current `DistributionStrategy` chooses when to synchronize. If `synchronization` is set to `ON_READ`, `trainable` must not be set to `True`. aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class `tf.VariableAggregation`. partitioner: (optional) partitioner instance (callable). If provided, when the requested variable is created it will be split into multiple partitions according to `partitioner`. In this case, an instance of `PartitionedVariable` is returned. Available partitioners include `tf.fixed_size_partitioner` and `tf.variable_axis_size_partitioner`. For more details, see the documentation of `tf.get_variable` and the "Variable Partitioners and Sharding" section of the API guide. **kwargs: Additional keyword arguments. Returns: The created variable. Usually either a `Variable` or `ResourceVariable` instance. If `partitioner` is not `None`, a `PartitionedVariable` instance is returned. Raises: RuntimeError: If called with partioned variable regularization and eager execution is enabled. ValueError: When trainable has been set to True with synchronization set as `ON_READ`. """ for kwarg in kwargs: if kwarg != 'experimental_autocast': raise TypeError('Unknown keyword argument:', kwarg) if self._keras_style: return super(Layer, self).add_weight( name=name, shape=shape, dtype=dtype, initializer=initializer, regularizer=regularizer, trainable=trainable, constraint=constraint, use_resource=use_resource, synchronization=vs.VariableSynchronization.AUTO, aggregation=vs.VariableAggregation.NONE, partitioner=partitioner, **kwargs) if synchronization == vs.VariableSynchronization.ON_READ: if trainable: raise ValueError( 'Synchronization value can be set to ' 'VariableSynchronization.ON_READ only for non-trainable variables. ' 'You have specified trainable=True and ' 'synchronization=VariableSynchronization.ON_READ.') else: # Set trainable to be false when variable is to be synced on read. trainable = False elif trainable is None: trainable = True def _should_add_regularizer(variable, existing_variable_set): if isinstance(variable, tf_variables.PartitionedVariable): for var in variable: if var in existing_variable_set: return False return True else: return variable not in existing_variable_set init_graph = None if not context.executing_eagerly(): default_graph = ops.get_default_graph() if default_graph.building_function: with ops.init_scope(): # Retrieve the variables from the graph into which variables # will be lifted; if initialization ops will be lifted into # the eager context, then there is nothing to retrieve, since variable # collections are not supported when eager execution is enabled. if not context.executing_eagerly(): init_graph = ops.get_default_graph() existing_variables = set(tf_variables.global_variables()) else: # Initialization ops will not be lifted out of the default graph. init_graph = default_graph existing_variables = set(tf_variables.global_variables()) if dtype is None: dtype = self.dtype or dtypes.float32 self._set_scope(None) reuse = self.built or self._reuse prev_len_trainable = len(self._trainable_weights) with vs.variable_scope( self._scope, reuse=reuse, auxiliary_name_scope=False) as scope: self._current_scope = scope with ops.name_scope(self._name_scope()): use_resource = (use_resource or self._use_resource_variables or scope.use_resource) if initializer is None: initializer = scope.initializer variable = super(Layer, self).add_weight( name, shape, dtype=dtypes.as_dtype(dtype), initializer=initializer, trainable=trainable, constraint=constraint, partitioner=partitioner, use_resource=use_resource, synchronization=synchronization, aggregation=aggregation, getter=vs.get_variable, **kwargs) if regularizer: if (ops.executing_eagerly_outside_functions() or _should_add_regularizer(variable, existing_variables)): self._handle_weight_regularization(name, variable, regularizer) if init_graph is not None: # Handle edge case where a custom getter has overridden `trainable`. # There is one known occurrence of this, in unit test # testBasicRNNCellNotTrainable in # contrib.rnn.python.kernel_tests.core_rnn_cell_test with init_graph.as_default(): trainable_variables = tf_variables.trainable_variables() if (trainable and self.trainable and variable not in trainable_variables): # A custom getter / variable scope overrode the trainable flag. extra_trainable_vars = self._trainable_weights[prev_len_trainable:] self._trainable_weights = self._trainable_weights[ :prev_len_trainable] self._non_trainable_weights += extra_trainable_vars return variable
def __call__(self, inputs, *args, **kwargs): """Wraps `call`, applying pre- and post-processing steps. Args: inputs: input tensor(s). *args: additional positional arguments to be passed to `self.call`. **kwargs: additional keyword arguments to be passed to `self.call`. **Note**: kwarg `scope` is reserved for use by the layer. Returns: Output tensor(s). Note: - If the layer's `call` method takes a `scope` keyword argument, this argument will be automatically set to the current variable scope. - If the layer's `call` method takes a `mask` argument (as some Keras layers do), its default value will be set to the mask generated for `inputs` by the previous layer (if `input` did come from a layer that generated a corresponding mask, i.e. if it came from a Keras layer with masking support. Raises: ValueError: if the layer's `call` method returns None (an invalid value). """ scope = kwargs.pop('scope', None) if self._keras_style: if scope is not None: raise ValueError( 'scope argument not allowed when keras style layers are enabled, ' 'but saw: {}'.format(scope)) return super(Layer, self).__call__(inputs, *args, **kwargs) self._set_scope(scope) if self.built: try: # Some classes which inherit from Layer do not use its constructor, so # rather than initializing to None we check for an AttributeError. scope_context_manager = self._always_reuse_variable_scope # pylint: disable=access-member-before-definition except AttributeError: scope_context_manager = None if scope_context_manager is None: # From this point we will always set reuse=True, so create a "final" # variable scope with this setting. We avoid re-creating variable scopes # after this point as an optimization. scope_context_manager = vs.variable_scope( self._scope, reuse=True, auxiliary_name_scope=False) # Do not cache variable scopes if Eager mode is enabled. If Eager mode # is enabled then we don't want to reuse scopes because the cached scope # might be from a FuncGraph or Eager scope we are no longer in. if not ops.executing_eagerly_outside_functions(): self._always_reuse_variable_scope = scope_context_manager else: scope_context_manager = vs.variable_scope( self._scope, reuse=self._reuse, auxiliary_name_scope=False) with scope_context_manager as scope: self._current_scope = scope try: call_has_scope_arg = self._call_has_scope_arg except AttributeError: self._call_fn_args = variable_scope_shim.fn_args(self.call) self._call_has_scope_arg = 'scope' in self._call_fn_args call_has_scope_arg = self._call_has_scope_arg if call_has_scope_arg: kwargs['scope'] = scope # Actually call layer outputs = super(Layer, self).__call__(inputs, *args, **kwargs) if not context.executing_eagerly(): # Update global default collections. _add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS) return outputs
def _instrument_symbolic_tensors(tensors, op_name, tfdbg_context_id): """Add debugging instrumentation for symbolic (i.e., non-eager) tensors. The detailed fashion in which the tensors are instrumented is determined by the tensor_debug_mode configured for the currently enabled dumping callback. Args: tensors: A tuple of Tensors to instrument. It is assumed that their ordering corresponds to the ordering of output tensors of an original op. Output slot indices (0-based) will be generated based on the ordering. op_name: Name of the op that emits the Tensors. tfdbg_context_id: A unique ID for the context that the op belongs to (e.g., a graph). Returns: Non-eager Tensors that override the `tensors` as the output of the op that originally generated `tensors`. In some cases (e.g., non-V1 graph mode), this may be `None`, as the instrumentation can simply rely on automatic control dependencies (see `auto_control_deps.py`) instead of tensor overriding. """ tensor_debug_mode = _state.config.tensor_debug_mode debug_urls = ["file://%s" % _state.config.dump_root] is_v1_graph_mode = not ops.executing_eagerly_outside_functions() instrumented_tensors = [] if is_v1_graph_mode else None if tensor_debug_mode == debug_event_pb2.TensorDebugMode.NO_TENSOR: for output_slot, tensor in enumerate(tensors): with ops.colocate_with(None, ignore_existing=True): # Except in V1 graph mode + control flow, debug_identity_v2 trigger auto # control dependency because it's a stateful op. debug_tensor = gen_debug_ops.debug_identity_v2( # Use an empty (shape=[0]) float32 tensor for the NO_TENSOR mode # as a low-overhead placeholder, since no actual tensor value is # traced. constant_op.constant([], dtype=dtypes.float32), tfdbg_context_id=tfdbg_context_id, op_name=op_name, output_slot=output_slot, tensor_debug_mode=_state.config.tensor_debug_mode, debug_urls=debug_urls) if is_v1_graph_mode: # TODO(cais): Evaluate performance optimization options. For the # `NO_TENSOR` debug mode, an alternative is to add `debug_tensor` as a # control dependency of `tensor.op` without an additional identity op. identity = array_ops.identity(tensor) identity.op._add_control_input( # pylint: disable=protected-access debug_tensor.op) instrumented_tensors.append(identity) return instrumented_tensors elif tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR: for output_slot, tensor in enumerate(tensors): with ops.colocate_with(None, ignore_existing=True): debug_tensor = gen_debug_ops.debug_identity_v2( tensor, tfdbg_context_id=tfdbg_context_id, op_name=op_name, output_slot=output_slot, tensor_debug_mode=_state.config.tensor_debug_mode, debug_urls=debug_urls) if is_v1_graph_mode: instrumented_tensors.append(debug_tensor) return instrumented_tensors else: raise NotImplementedError( "Symbolic tensor instrumentation is not implemented for debug mode %s" % _state.config.tensor_debug_mode)
def eager_py_func(func, inp, Tout, name=None): """Wraps a python function into a TensorFlow op that executes it eagerly. This function allows expressing computations in a TensorFlow graph as Python functions. In particular, it wraps a Python function `func` in a once-differentiable TensorFlow operation that executes it with eager execution enabled. As a consequence, `tf.py_function` makes it possible to express control flow using Python constructs (`if`, `while`, `for`, etc.), instead of TensorFlow control flow constructs (`tf.cond`, `tf.while_loop`). For example, you might use `tf.py_function` to implement the log huber function: ```python def log_huber(x, m): if tf.abs(x) <= m: return x**2 else: return m**2 * (1 - 2 * tf.math.log(m) + tf.math.log(x**2)) x = tf.compat.v1.placeholder(tf.float32) m = tf.compat.v1.placeholder(tf.float32) y = tf.py_function(func=log_huber, inp=[x, m], Tout=tf.float32) dy_dx = tf.gradients(y, x)[0] with tf.compat.v1.Session() as sess: # The session executes `log_huber` eagerly. Given the feed values below, # it will take the first branch, so `y` evaluates to 1.0 and # `dy_dx` evaluates to 2.0. y, dy_dx = sess.run([y, dy_dx], feed_dict={x: 1.0, m: 2.0}) ``` You can also use `tf.py_function` to debug your models at runtime using Python tools, i.e., you can isolate portions of your code that you want to debug, wrap them in Python functions and insert `pdb` tracepoints or print statements as desired, and wrap those functions in `tf.py_function`. For more information on eager execution, see the [Eager guide](https://tensorflow.org/guide/eager). `tf.py_function` is similar in spirit to `tf.compat.v1.py_func`, but unlike the latter, the former lets you use TensorFlow operations in the wrapped Python function. In particular, while `tf.compat.v1.py_func` only runs on CPUs and wraps functions that take NumPy arrays as inputs and return NumPy arrays as outputs, `tf.py_function` can be placed on GPUs and wraps functions that take Tensors as inputs, execute TensorFlow operations in their bodies, and return Tensors as outputs. Like `tf.compat.v1.py_func`, `tf.py_function` has the following limitations with respect to serialization and distribution: * The body of the function (i.e. `func`) will not be serialized in a `GraphDef`. Therefore, you should not use this function if you need to serialize your model and restore it in a different environment. * The operation must run in the same address space as the Python program that calls `tf.py_function()`. If you are using distributed TensorFlow, you must run a `tf.distribute.Server` in the same process as the program that calls `tf.py_function()` and you must pin the created operation to a device in that server (e.g. using `with tf.device():`). Args: func: A Python function that accepts `inp` as arguments, and returns a value (or list of values) whose type is described by `Tout`. inp: Input arguments for `func`. A list whose elements are `Tensor`s or `CompositeTensors` (such as `tf.RaggedTensor`); or a single `Tensor` or `CompositeTensor`. Tout: The type(s) of the value(s) returned by `func`. One of the following. * If `func` returns a `Tensor` (or a value that can be converted to a Tensor): the `tf.DType` for that value. * If `func` returns a `CompositeTensor`: The `tf.TypeSpec` for that value. * If `func` returns `None`: the empty list (`[]`). * If `func` returns a list of `Tensor` and `CompositeTensor` values: a corresponding list of `tf.DType`s and `tf.TypeSpec`s for each value. name: A name for the operation (optional). Returns: The value(s) computed by `func`: a `Tensor`, `CompositeTensor`, or list of `Tensor` and `CompositeTensor`; or an empty list if `func` returns `None`. """ if ops.executing_eagerly_outside_functions(): with ops.device(context.context().host_address_space()): return _internal_py_func( func=func, inp=inp, Tout=Tout, use_eager_py_func=True, name=name) return _internal_py_func( func=func, inp=inp, Tout=Tout, use_eager_py_func=True, name=name)
def py_func_common(func, inp, Tout, stateful=True, name=None): """Wraps a python function and uses it as a TensorFlow op. Given a python function `func`, which takes numpy arrays as its arguments and returns numpy arrays as its outputs, wrap this function as an operation in a TensorFlow graph. The following snippet constructs a simple TensorFlow graph that invokes the `np.sinh()` NumPy function as a operation in the graph: ```python def my_func(x): # x will be a numpy array with the contents of the placeholder below return np.sinh(x) input = tf.compat.v1.placeholder(tf.float32) y = tf.compat.v1.py_func(my_func, [input], tf.float32) ``` **N.B.** The `tf.compat.v1.py_func()` operation has the following known limitations: * The body of the function (i.e. `func`) will not be serialized in a `GraphDef`. Therefore, you should not use this function if you need to serialize your model and restore it in a different environment. * The operation must run in the same address space as the Python program that calls `tf.compat.v1.py_func()`. If you are using distributed TensorFlow, you must run a `tf.distribute.Server` in the same process as the program that calls `tf.compat.v1.py_func()` and you must pin the created operation to a device in that server (e.g. using `with tf.device():`). Note: It produces tensors of unknown shape and rank as shape inference does not work on arbitrary Python code. If you need the shape, you need to set it based on statically available information. E.g. ```python import tensorflow as tf import numpy as np def make_synthetic_data(i): return np.cast[np.uint8](i) * np.ones([20,256,256,3], dtype=np.float32) / 10. def preprocess_fn(i): ones = tf.py_function(make_synthetic_data,[i],tf.float32) ones.set_shape(tf.TensorShape([None, None, None, None])) ones = tf.image.resize(ones, [224,224]) return ones ds = tf.data.Dataset.range(10) ds = ds.map(preprocess_fn) ``` Args: func: A Python function, which accepts `ndarray` objects as arguments and returns a list of `ndarray` objects (or a single `ndarray`). This function must accept as many arguments as there are tensors in `inp`, and these argument types will match the corresponding `tf.Tensor` objects in `inp`. The returns `ndarray`s must match the number and types defined `Tout`. Important Note: Input and output numpy `ndarray`s of `func` are not guaranteed to be copies. In some cases their underlying memory will be shared with the corresponding TensorFlow tensors. In-place modification or storing `func` input or return values in python datastructures without explicit (np.)copy can have non-deterministic consequences. inp: A list of `Tensor` objects. Tout: A list or tuple of tensorflow data types or a single tensorflow data type if there is only one, indicating what `func` returns. stateful: (Boolean.) If True, the function should be considered stateful. If a function is stateless, when given the same input it will return the same output and have no observable side effects. Optimizations such as common subexpression elimination are only performed on stateless operations. name: A name for the operation (optional). Returns: A list of `Tensor` or a single `Tensor` which `func` computes. @compatibility(TF2) This name was deprecated and removed in TF2, but `tf.numpy_function` is a near-exact replacement, just drop the `stateful` argument (all `tf.numpy_function` calls are considered stateful). It is compatible with eager execution and `tf.function`. `tf.py_function` is a close but not an exact replacement, passing TensorFlow tensors to the wrapped function instead of NumPy arrays, which provides gradients and can take advantage of accelerators. Before: >>> def fn_using_numpy(x): ... x[0] = 0. ... return x >>> tf.compat.v1.py_func(fn_using_numpy, inp=[tf.constant([1., 2.])], ... Tout=tf.float32, stateful=False) <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 2.], dtype=float32)> After: >>> tf.numpy_function(fn_using_numpy, inp=[tf.constant([1., 2.])], ... Tout=tf.float32) <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 2.], dtype=float32)> @end_compatibility """ if context.executing_eagerly(): result = func(*[np.array(x) for x in inp]) result = nest.flatten(result) result = [x if x is None else ops.convert_to_tensor(x) for x in result] if len(result) == 1: # Mimic the automatic unwrapping in graph-mode py_func result, = result return result if ops.executing_eagerly_outside_functions(): with ops.device(context.context().host_address_space()): return _internal_py_func( func=func, inp=inp, Tout=Tout, stateful=stateful, use_eager_py_func=False, name=name) return _internal_py_func( func=func, inp=inp, Tout=Tout, stateful=stateful, use_eager_py_func=False, name=name)
def _initialize_local(self, cluster_resolver, devices=None): """Initializes the object for local training.""" self._is_chief = True self._num_workers = 1 if ops.executing_eagerly_outside_functions(): try: context.context().configure_collective_ops( scoped_allocator_enabled_ops=("CollectiveReduce", )) except RuntimeError: logging.warning( "Collective ops is not configured at program startup. " "Some performance features may not be enabled.") self._collective_ops_configured = True if devices: local_devices = devices if "GPU" in devices[0]: local_device_type = "GPU" elif "TPU" in devices[0]: local_device_type = "TPU" else: local_device_type = "CPU" else: local_devices, local_device_type = self._initialize_local_devices( cluster_resolver, worker_device="") self._worker_device = device_util.canonicalize("/device:CPU:0") self._host_input_device = numpy_dataset.SingleDevice( self._worker_device) self._collective_keys = cross_device_utils.CollectiveKeys( group_key_start=1 + self._collective_key_base) self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=local_devices, group_size=len(local_devices), collective_keys=self._collective_keys) # CrossDeviceOps for per host tensors. self._host_cross_device_ops = cross_device_ops_lib.CollectiveAllReduce( devices=[self._worker_device], group_size=self._num_workers, collective_keys=self._collective_keys) super(CollectiveAllReduceExtended, self)._initialize_single_worker(local_devices) self._cluster_spec = None self._task_type = None self._task_id = None self._id_in_cluster = 0 # This is a mark to tell whether we are running with standalone client or # independent worker. Right now with standalone client, strategy object is # created as local strategy and then turn into multi-worker strategy via # configure call. self._local_or_standalone_client_mode = True # Save the num_devices_per_worker and rpc_layer for configure method. self._num_devices_per_worker = len(local_devices) self._local_device_type = local_device_type self._rpc_layer = cluster_resolver.rpc_layer self._warn_nccl_no_gpu() logging.info( "Single-worker MultiWorkerMirroredStrategy with local_devices " "= %r, communication = %s", local_devices, self._communication_options.implementation)
def maybe_init_scope(): if ops.executing_eagerly_outside_functions(): yield else: with ops.init_scope(): yield
def _check_layer_class(self, layer): if ops.executing_eagerly_outside_functions(): self.assertIsInstance(layer, base_layer.Layer) self.assertNotIsInstance(layer, base_layer_v1.Layer) else: self.assertIsInstance(layer, base_layer_v1.Layer)
def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None): """Saves a model as a TensorFlow SavedModel or HDF5 file. The saved model contains: - the model's configuration (topology) - the model's weights - the model's optimizer's state (if any) Thus the saved model can be reinstantiated in the exact same state, without any of the code used for model definition or training. Arguments: model: Keras model instance to be saved. filepath: One of the following: - String, path where to save the model - `h5py.File` object where to save the model overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5. The 'tf' option is currently disabled, and will be enabled when Keras SavedModel export is no longer experimental. (The experimental function is tf.keras.experimental.export_saved_model). Raises: ImportError: If save format is hdf5, and h5py is not available. """ from tensorflow.python.keras.engine import sequential # pylint: disable=g-import-not-at-top if (not tf2.enabled() and not ops.executing_eagerly_outside_functions() and save_format == 'tf'): raise NotImplementedError( 'Saving the model as SavedModel is not supported in TensorFlow 1.X' 'graph mode. Please enable eager execution or use the "h5" save format.' ) if _KERAS_SAVED_MODEL_STILL_EXPERIMENTAL and save_format == 'tf': raise NotImplementedError( 'Saving the model as SavedModel is still in experimental stages. ' 'Please use tf.keras.experimental.export_saved_model, or use ' 'save_format="h5" to save to HDF5.') # TODO(kathywu): Remove this when Keras SavedModel is not experimental. save_format = 'h5' if (save_format == 'h5' or (h5py is not None and isinstance(filepath, h5py.File)) or os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS): # TODO(b/130258301): add utility method for detecting model type. if (not model._is_graph_network and # pylint:disable=protected-access not isinstance(model, sequential.Sequential)): raise NotImplementedError( 'Saving the model to HDF5 format requires the model to be a ' 'Functional model or a Sequential model. It does not work for ' 'subclassed models, because such models are defined via the body of ' 'a Python method, which isn\'t safely serializable. Consider saving ' 'to the Tensorflow SavedModel format (by setting save_format="tf") ' 'or using `save_weights`.') hdf5_format.save_model_to_hdf5( model, filepath, overwrite, include_optimizer) return
def _build_graph_network_for_inferred_shape(self, input_shape, input_dtype=None): if input_shape is None or not self.layers: return if not tf2.enabled() or not ops.executing_eagerly_outside_functions(): # This behavior is disabled in V1 or when eager execution is disabled. return if (not self._has_explicit_input_shape and not self._use_legacy_deferred_behavior): # Determine whether the input shape is novel, i.e. whether the model # should be rebuilt. input_shape = tuple(input_shape) if self._inferred_input_shape is None: new_shape = input_shape else: new_shape = relax_input_shape(self._inferred_input_shape, input_shape) if (new_shape is not None and new_shape != self._inferred_input_shape): # A novel shape has been received: we need to rebuild the model. # In case we are inside a graph function, we step out of it. with ops.init_scope(): inputs = input_layer.Input( batch_shape=new_shape, dtype=input_dtype, name=self.layers[0].name + '_input') layer_input = inputs created_nodes = set() for layer in self.layers: # Clear nodes previously created via this method. This prevents # node accumulation and ensures that e.g. `layer.output` is # always connected to `model.inputs` # (this is important e.g. for the feature extraction use case). # We don't just do `layer._inbound_nodes = []` in order # not to break shared layers added to Sequential models (which is # technically illegal as per the `add()` docstring, # but wasn't previously disabled). clear_previously_created_nodes(layer, self._created_nodes) try: # Create Functional API connection by calling the current layer layer_output = layer(layer_input) except: # pylint:disable=bare-except # Functional API calls may fail for a number of reasons: # 1) The layer may be buggy. In this case it will be easier for # the user to debug if we fail on the first call on concrete data, # instead of our own call on a symbolic input. # 2) The layer is dynamic (graph-incompatible) and hasn't # overridden `compute_output_shape`. In this case, it is # impossible to build a graph network. # 3) The layer is otherwise incompatible with the Functional API # (e.g. this is the case for some probabilistic layers that rely # on hacks and that do not return tensors). # In all these cases, we should avoid creating a graph network # (or we simply can't). self._use_legacy_deferred_behavior = True return if len(nest.flatten(layer_output)) != 1: raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) # Keep track of nodes just created above track_nodes_created_by_last_call(layer, created_nodes) layer_input = layer_output outputs = layer_output self._created_nodes = created_nodes try: # Initialize a graph Network. This call will never fail for # a stack of valid Keras layers. # However some users have layers that are fundamentally incompatible # with the Functional API, which do not return tensors. In this # case, we fall back to the legacy deferred behavior. # TODO(fchollet): consider raising here, as we should not be # supporting such layers. self._init_graph_network(inputs, outputs) self._graph_initialized = True except: # pylint:disable=bare-except self._use_legacy_deferred_behavior = True self._inferred_input_shape = new_shape