def container(self, container_name): """Returns a context manager that specifies the resource container to use. Overridden from `tf.Graph` to update both the init_scope container and the present inner container. This is necessary to make sure setting containers applies correctly both to created variables and to stateful ops. Args: container_name: container name string. Returns: A context manager for defining resource containers for stateful ops, yields the container name. """ original_container = self._container # pylint: disable=protected-access with ops.init_scope(): original_init_container = ops.get_default_graph()._container try: self._container = container_name with ops.init_scope(): ops.get_default_graph()._container = container_name yield self._container finally: self._container = original_container with ops.init_scope(): ops.get_default_graph()._container = original_init_container
def _real_mirrored_creator(devices, *args, **kwargs): # pylint: disable=g-missing-docstring value_list = [] for i, d in enumerate(devices): with ops.device(d): if i > 0: # Give replicas meaningful distinct names: var0name = value_list[0].name.split(":")[0] # We append a / to variable names created on replicas with id > 0 to # ensure that we ignore the name scope and instead use the given # name as the absolute name of the variable. kwargs["name"] = "%s/replica_%d/" % (var0name, i) # Initialize replicas with the same value: if context.executing_eagerly() or ops.inside_function(): with ops.init_scope(): kwargs["initial_value"] = array_ops.identity( value_list[0].value()) else: def initial_value_fn(device=d): with ops.device(device): return array_ops.identity(value_list[0].initial_value) kwargs["initial_value"] = initial_value_fn with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): v = next_creator(*args, **kwargs) assert not isinstance(v, values.TPUMirroredVariable) value_list.append(v) return value_list
def apply_gradients(self, grads_and_vars, global_step=None, name=None): var_list = [ v for _,v in grads_and_vars] d_vars = [] g_vars = [] all_grads = [ g for g, _ in grads_and_vars ] for grad,var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] elif var in self.gan.g_vars(): g_vars += [var] else: raise("Couldn't find var in g_vars or d_vars") with ops.init_scope(): self.optimizer._create_slots([v for g,v in grads_and_vars]) self._prepare() d_grads = all_grads[:len(d_vars)] if self.config.type == 'sga': Jgrads = tf.gradients(d_grads, d_vars, grad_ys=d_grads, stop_gradients=d_vars) + [tf.zeros_like(g) for g in g_vars] elif self.config.type == 'magnitude': consensus_reg = [tf.square(g) for g in d_grads if g is not None] Jgrads = tf.gradients(consensus_reg, d_vars) + [tf.zeros_like(g) for g in g_vars] else: consensus_reg = 0.5 * sum( tf.reduce_sum(tf.square(g)) for g in d_grads if g is not None ) Jgrads = tf.gradients(consensus_reg, d_vars, stop_gradients=d_vars) + [tf.zeros_like(g) for g in g_vars] new_grads = [g+jg*self._beta if jg is not None else g for g,v,jg in zip(all_grads, var_list, Jgrads)] new_grads_and_vars = list(zip(new_grads, var_list)).copy() return self.optimizer.apply_gradients(new_grads_and_vars, global_step=global_step, name=name)
def _check_same_graph(self): """Checks that the module is not being connect to multiple Graphs. An instance of a Sonnet module 'owns' the variables it contains, and permits seamless variable sharing. As such, connecting a single module instance to multiple Graphs is not possible - this function will raise an error should that occur. Raises: DifferentGraphError: if the module is connected to a different Graph than it was previously used in. """ with ops.init_scope(): # We need `init_scope` incase we're running inside a defun. In that case # what we want is information about where the function will be called not # where the function is being built. current_graph = tf.get_default_graph() will_call_in_eager_context = tf.executing_eagerly() if self._graph is None: self._graph = current_graph self._set_module_info() if not will_call_in_eager_context: # Same graph checks only make sense when calling from graph mode (in eager # mode there is a single process level context where all modules are # created). if self._graph != current_graph: raise DifferentGraphError("Cannot connect module to multiple Graphs.")
def apply_gradients(self, grads_and_vars, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ grads_and_vars = _filter_grads(grads_and_vars) var_list = [v for (_, v) in grads_and_vars] self._create_hypers() with ops.init_scope(): self._create_slots(var_list) self._prepare(var_list) return distribute_ctx.get_replica_context().merge_call( self._distributed_apply, args=(grads_and_vars,), kwargs={"name": name})
def apply_gradients(self, grads_and_vars, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ grads_and_vars = _filter_grads(grads_and_vars) var_list = [v for (_, v) in grads_and_vars] if distribution_strategy_context.has_distribution_strategy(): reduced_grads = merge_grads(grads_and_vars) grads_and_vars = zip(reduced_grads, var_list) with ops.init_scope(): self._prepare() self._create_slots(var_list) update_ops = [] def update_grad_to_var(grad, var): """Apply gradient to variable.""" if isinstance(var, ops.Tensor): raise NotImplementedError("Trying to update a Tensor ", var) if isinstance(grad, ops.IndexedSlices): if var.constraint is not None: raise RuntimeError( "Cannot use a constraint function on a sparse variable.") return self._resource_apply_sparse_duplicate_indices( grad.values, var, grad.indices) update_op = self._resource_apply_dense(grad, var) if var.constraint is not None: with ops.control_dependencies([update_op]): return var.assign(var.constraint(var)) else: return update_op with ops.name_scope(name, self._name) as name: for grad, var in grads_and_vars: scope_name = ("" if ops.executing_eagerly_outside_functions() else "_" + var.op.name) with ops.name_scope("update" + scope_name): update_ops.append(update_grad_to_var(grad, var)) # control dependencies does not work in per replica mode, please change # this once b/118841692 is fixed. # with ops.control_dependencies(update_ops): # apply_updates = self._iterations.assign_add(1).op apply_updates = merge_update_step(update_ops, self.iterations) return apply_updates
def _real_mirrored_creator(devices, *args, **kwargs): # pylint: disable=g-missing-docstring index = {} for i, d in enumerate(devices): with ops.init_scope(), ops.device(d): if i > 0: # Give replicas meaningful distinct names: var0name = index[devices[0]].name.split(":")[0] # We append a / to variable names created on replicas with id > 0 to # ensure that we ignore the name scope and instead use the given # name as the absolute name of the variable. kwargs["name"] = "%s/replica_%d/" % (var0name, i) # Initialize replicas with the same value: def initial_value_fn(device=d): if context.executing_eagerly(): init_value = index[devices[0]].value() return array_ops.identity(init_value) else: with ops.device(device): init_value = index[devices[0]].initial_value return array_ops.identity(init_value) kwargs["initial_value"] = initial_value_fn with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): # Don't record operations (e.g. other variable reads) during # variable creation. with tape.stop_recording(): v = next_creator(*args, **kwargs) assert not isinstance(v, values.DistributedVariable) index[d] = v return index
def value_tensors(self): """Create value `Tensor`s for this object's attributes. Does not require that the Python object has been created. Used for restore-on-create when executing eagerly. Returns: A dictionary mapping from object attribute names to `Tensor`s. """ value_tensors = {} for serialized_tensor in self.object_proto.attributes: checkpoint_key = serialized_tensor.checkpoint_key dtype = self._checkpoint.dtype_map[checkpoint_key] base_type = dtype.base_dtype with ops.init_scope(): with ops.device("/cpu:0"): # Run the restore itself on the CPU. value, = io_ops.restore_v2( prefix=self._checkpoint.save_path, tensor_names=[checkpoint_key], shape_and_slices=[""], dtypes=[base_type], name="%s_checkpoint_read" % (serialized_tensor.name,)) # Copy the value to the current device if necessary. value_tensors[serialized_tensor.name] = array_ops.identity(value) return value_tensors
def _capture_by_value( self, op_type, inputs, dtypes, # pylint: disable=redefined-outer-name input_types=None, name=None, attrs=None, op_def=None, compute_shapes=True, compute_device=True): # When capturing by value, do the read outside reverse_captures = dict((v, k) for k, v in self.captures.items()) uncaptured_inputs = [reverse_captures.get(t, t) for t in inputs] with ops.init_scope(): if context.executing_eagerly(): attr_list = ("dtype", int(attrs["dtype"].type)) value, = execute.execute( compat.as_bytes(op_type), 1, uncaptured_inputs, attr_list, context.context()) else: op = ops.get_default_graph().create_op( op_type, uncaptured_inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device) value = op.outputs[0] captured_value = self.capture(value) return captured_value.op
def _default_getter(name, shape, dtype, initializer=None, partition_info=None, **kwargs): """A pared-down version of get_variable which does not reuse variables.""" dtype = dtypes.as_dtype(dtype) shape_object = tensor_shape.as_shape(shape) with ops.init_scope(): if initializer is None: initializer, initializing_from_value = ( variable_scope._get_default_variable_store()._get_default_initializer( # pylint: disable=protected-access name=name, shape=shape_object, dtype=dtype)) else: initializing_from_value = not callable(initializer) # Same logic as get_variable variable_dtype = dtype.base_dtype if initializing_from_value: if shape is not None: raise ValueError("If initializer is a constant, do not specify shape.") initial_value = initializer else: # Instantiate initializer if provided initializer is a type object. if isinstance(initializer, type(init_ops.Initializer)): initializer = initializer(dtype=dtype) def initial_value(): return initializer( shape_object.as_list(), dtype=dtype, partition_info=partition_info) return resource_variable_ops.ResourceVariable( initial_value=initial_value, name=name, dtype=variable_dtype, **kwargs )
def load_function_def_library(library): """Load a set of functions as concrete functions without captured inputs. Functions names are manipulated during load such that they do not overlap with previously created ones. Args: library: FunctionDefLibrary proto message. Returns: Map of original function names in the library to instances of `ConcreteFunction` without captured inputs. Raises: ValueError: if functions dependencies have a cycle. """ functions = {} for fdef in _sort_function_defs(library): copy = _fix_fdef(fdef, functions) func_graph = function_def_lib.function_def_to_graph(copy) for dep in _list_function_deps(fdef): functions[dep].add_to_graph(func_graph) func = function_lib.ConcreteFunction(func_graph) func.add_to_graph() functions[fdef.signature.name] = func # Also register the gradients in the current root context. with ops.init_scope(): func._register_gradient() # pylint: disable=protected-access return functions
def initialize_variables(): for v, init in initializer_map.items(): with ops.init_scope(): if resource_variable_ops.var_is_initialized_op(v.handle): # Ignore variables which are already initialized at trace time. continue v.assign(lift_to_graph.lift_to_graph( [init], ops.get_default_graph())[init])
def __init__(self, path): """Record the full path to the asset.""" # The init_scope prevents functions from capturing `path` in an # initialization graph, since it is transient and should not end up in a # serialized function body. with ops.init_scope(), ops.device("CPU"): self._path = ops.internal_convert_to_tensor(path, dtype=dtypes.string, name="asset_path")
def _get_beta_accumulators(self): with ops.init_scope(): if context.executing_eagerly(): graph = None else: graph = ops.get_default_graph() return (self._get_non_slot_variable("beta1_power", graph=graph), self._get_non_slot_variable("beta2_power", graph=graph))
def create_file_writer_v2(logdir, max_queue=None, flush_millis=None, filename_suffix=None, name=None): """Creates a summary file writer for the given log directory. Args: logdir: a string specifying the directory in which to write an event file. max_queue: the largest number of summaries to keep in a queue; will flush once the queue gets bigger than this. Defaults to 10. flush_millis: the largest interval between flushes. Defaults to 120,000. filename_suffix: optional suffix for the event file name. Defaults to `.v2`. name: a name for the op that creates the writer. Returns: A SummaryWriter object. """ if logdir is None: raise ValueError("logdir cannot be None") inside_function = ops.inside_function() with ops.name_scope(name, "create_file_writer") as scope, ops.device("cpu:0"): # Run init inside an init_scope() to hoist it out of tf.functions. with ops.init_scope(): if context.executing_eagerly(): _check_create_file_writer_args( inside_function, logdir=logdir, max_queue=max_queue, flush_millis=flush_millis, filename_suffix=filename_suffix) logdir = ops.convert_to_tensor(logdir, dtype=dtypes.string) if max_queue is None: max_queue = constant_op.constant(10) if flush_millis is None: flush_millis = constant_op.constant(2 * 60 * 1000) if filename_suffix is None: filename_suffix = constant_op.constant(".v2") # Prepend the PID and a process-local UID to the filename suffix to avoid # filename collisions within the machine (the filename already contains # the hostname to avoid cross-machine collisions). unique_prefix = constant_op.constant(".%s.%s" % (os.getpid(), ops.uid())) filename_suffix = unique_prefix + filename_suffix # Use a unique shared_name to prevent resource sharing. if context.executing_eagerly(): shared_name = context.shared_name() else: shared_name = ops.name_from_scope_name(scope) # pylint: disable=protected-access return ResourceSummaryWriter( shared_name=shared_name, init_op_fn=functools.partial( gen_summary_ops.create_summary_file_writer, logdir=logdir, max_queue=max_queue, flush_millis=flush_millis, filename_suffix=filename_suffix), name=name, v2=True)
def __init__(self, dist, coord, replica_id, device_map, variable_creator_fn, fn, args, kwargs): super(_MirroredReplicaThread, self).__init__() self.coord = coord self.distribution = dist self.device_map = device_map self.replica_id = replica_id self.variable_creator_fn = variable_creator_fn # State needed to run and return the results of `fn`. self.main_fn = fn self.main_args = args self.main_kwargs = kwargs self.main_result = None self.done = False # State needed to run the next merge_call() (if any) requested via # ReplicaContext. self.merge_fn = None self.merge_args = None self.merge_kwargs = None self.merge_result = None self.captured_name_scope = None self.captured_var_scope = None # We use a thread.Event for the main thread to signal when this # thread should start running (`should_run`), and another for # this thread to transfer control back to the main thread # (`has_paused`, either when it gets to a # `get_replica_context().merge_call` or when `fn` returns). In # either case the event starts cleared, is signaled by calling # set(). The receiving thread waits for the signal by calling # wait() and then immediately clearing the event using clear(). self.should_run = threading.Event() self.has_paused = threading.Event() # These fields have to do with inheriting various contexts from the # parent thread: ctx = context.context() self.in_eager = ctx.executing_eagerly() self.record_thread_local_context_fields() # pylint: disable=protected-access if not ctx._context_handle: ctx._initialize_handle_and_devices() self.context_device_policy = ( pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy( ctx._context_handle)) self.graph = ops.get_default_graph() with ops.init_scope(): self._init_in_eager = context.executing_eagerly() self._init_graph = ops.get_default_graph() self._variable_creator_stack = self.graph._variable_creator_stack[:] self._var_scope = variable_scope.get_variable_scope() # Adding a "/" at end lets us re-enter this scope later. self._name_scope = self.graph.get_name_scope() if self._name_scope: self._name_scope += "/" if self.replica_id > 0: if not self._name_scope: self._name_scope = "" self._name_scope += "replica_%d/" % self.replica_id
def add_var(x): if not v_holder: v = variables.Variable([1., 2.]) v_holder.append(v) already_initialized = variables.Variable(3.) with ops.init_scope(): already_initialized.assign(10.) v_holder.append(already_initialized) return v_holder[0] + v_holder[1] + x
def restore(self, checkpointable): """Restore this value into `checkpointable`.""" with ops.init_scope(): if self.bind_object(checkpointable): # This object's correspondence with a checkpointed object is new, so # process deferred restorations for it and its dependencies. restore_ops = checkpointable._restore_from_checkpoint_position(self) # pylint: disable=protected-access if restore_ops: self._checkpoint.restore_ops.extend(restore_ops)
def init_fn(): self.assertTrue(context.executing_eagerly()) with ops.init_scope(): self.assertTrue(context.executing_eagerly()) context_switches = context.context().context_switches self.assertEqual(len(context_switches.stack), 1) self.assertFalse(context_switches.stack[0].is_building_function) self.assertEqual(context_switches.stack[0].enter_context_fn, context.eager_mode)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): var_list = [ v for _,v in grads_and_vars] with ops.init_scope(): zt = [self._get_or_make_slot(v, v, "zt", self._name) for _,v in grads_and_vars] slots_list = [] for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): self._get_or_make_slot(var, var, "zt", "zt") self._prepare() def _name(post, s): ss = s.split(":") return ss[0] + "_" + post + "_dontsave" zt = [self.get_slot(v, "zt") for _,v in grads_and_vars] xt = [tf.Variable(v, name=_name("gigaxt",v.name)) for _,v in grads_and_vars] tmp = [tf.Variable(v, name=_name("gigatmp",v.name)) for _,v in grads_and_vars] xslots_list = [] zslots_list = [] tmpslots_list = [] slots_vars = [] for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): slots_vars += [var] xslots_list.append(tf.Variable(var)) zslots_list.append(self._get_or_make_slot(var, var, "zt", "zt")) tmpslots_list.append(tf.Variable(var, name=_name("gigaslottmp", var.name))) restored_vars = var_list + slots_vars zt_vars = zt + zslots_list xt_vars = xt + xslots_list tmp_vars = tmp + tmpslots_list all_grads = [ g for g, _ in grads_and_vars ] # store variables for resetting op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, restored_vars)]) # store tmp_vars with tf.get_default_graph().control_dependencies([op1]): op2 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) with tf.get_default_graph().control_dependencies([op2]): op3 = tf.group(*[tf.assign(w, v) for w,v in zip(xt_vars, restored_vars)]) # store xt^+1 in xt_vars with tf.get_default_graph().control_dependencies([op3]): op4 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, zt_vars)]) # restore vars to zt (different weights) with tf.get_default_graph().control_dependencies([op4]): op5 = self.optimizer2.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) # zt+1 with tf.get_default_graph().control_dependencies([op5]): zt1_xt1 = [_restored_vars - _xt1_vars for _restored_vars, _xt1_vars in zip(restored_vars, xt_vars)] St1 = [tf.minimum(1.0, tf.norm(_zt1_vars-_zt_vars) / tf.norm(_zt1_xt1)) for _zt1_vars, _zt_vars, _zt1_xt1 in zip(restored_vars, zt_vars, zt1_xt1)] self.gan.add_metric('st1',tf.reduce_mean(tf.add_n(St1)/len(St1))) #self.gan.add_metric('xzt1',tf.norm(xt_vars[0]-zt_vars[0])) nextw = [_xt_t1 + _St1 * _zt1_xt1 for _xt_t1, _St1, _zt1_xt1 in zip(xt_vars, St1, zt1_xt1)] op6 = tf.group(*[tf.assign(w, v) for w,v in zip(zt_vars, restored_vars)]) # set zt+1 with tf.get_default_graph().control_dependencies([op6]): op7 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, nextw)]) # set xt+1 with tf.get_default_graph().control_dependencies([op7]): return tf.no_op()
def create_variable(): with ops.init_scope(): initial_value = random_ops.random_uniform( (2, 2), maxval=1000000, dtype=dtypes.int64) if not a: with ops.device("CPU:0"): a.append(resource_variable_ops.ResourceVariable(initial_value)) return a[0].read_value()
def _get_unique_name(name): """Returns a name that is unique in the root graph of `func_graph`. Args: name: String to uniquify. Returns: A string. """ with ops.init_scope(): return ops.get_default_graph().unique_name(name)
def load(self, tags): """Creates an object from the MetaGraph identified by `tags`.""" meta_graph_def = self.get_meta_graph_def_from_tags(tags) load_graph_returns = [None] wrapped = wrap_function.wrap_function( functools.partial(self.load_graph, load_graph_returns, meta_graph_def), signature=[]) saver, = load_graph_returns self.restore_variables(wrapped, saver) with wrapped.graph.as_default(): init_op = loader_impl.get_init_op( meta_graph_def) or monitored_session.Scaffold.default_local_init_op() # Add a dummy Tensor we know we can fetch to add control dependencies to. init_anchor = constant_op.constant(0., name="dummy_fetch") root = tracking.AutoTrackable() asset_feed_tensors = [] asset_paths = [] for tensor_name, value in loader_impl.get_asset_tensors( self._export_dir, meta_graph_def).items(): asset_feed_tensors.append(wrapped.graph.as_graph_element(tensor_name)) asset_paths.append(tracking.TrackableAsset(value)) init_fn = wrapped.prune( feeds=asset_feed_tensors, fetches=[init_anchor, wrapped.graph.as_graph_element(init_op)]) initializer = _Initializer(init_fn, asset_paths) # pylint: disable=protected-access local_init_op, _ = initializer._initialize() # pylint: enable=protected-access with ops.init_scope(): if not context.executing_eagerly(): ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, local_init_op) for variable in wrapped.graph.get_collection_ref( ops.GraphKeys.LOCAL_VARIABLES): # pylint: disable=protected-access variable._initializer_op = local_init_op # pylint: enable=protected-access root.initializer = initializer root.asset_paths = asset_paths signature_functions = self._extract_signatures(wrapped, meta_graph_def) root.signatures = signature_serialization.create_signature_map( signature_functions) root.variables = list(wrapped.graph.variables) root.tensorflow_version = ( meta_graph_def.meta_info_def.tensorflow_version) root.tensorflow_git_version = ( meta_graph_def.meta_info_def.tensorflow_git_version) root.graph = wrapped.graph root.prune = wrapped.prune return root
def apply_gradients(self, grads_and_vars, global_step=None, name=None): var_list = [ v for _,v in grads_and_vars] d_vars = [] g_vars = [] for grad,var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] elif var in self.gan.g_vars(): g_vars += [var] else: raise("Couldn't find var in g_vars or d_vars") with ops.init_scope(): v1 = [self._zeros_slot(v, "v1", self._name) for _,v in grads_and_vars] if self.config.include_slots: for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): self._zeros_slot(var, "pm", "pm") self._prepare() v1 = [self.get_slot(v, "v1") for _,v in grads_and_vars] slots_list = [] slots_vars = [] if self.config.include_slots: for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): slots_vars += [var] slots_list.append(self._zeros_slot(var, "pm", "pm")) current_vars = var_list + slots_vars tmp_vars = v1 + slots_list all_grads = [ g for g, _ in grads_and_vars ] op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, current_vars)]) # store variables with tf.get_default_graph().control_dependencies([op1]): # store g2 #op3 = tf.group(*[tf.assign_sub(v, self._lr_t*grad) for grad,v in grads_and_vars]) op3 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) with tf.get_default_graph().control_dependencies([op3]): def pmcombine(_v1,_v2): return _v2 + (_v2 - _v1) combined = [pmcombine(_v1, _v2) for _v1, _v2 in zip(tmp_vars, current_vars)] # restore v1, slots op5 = tf.group(*[ tf.assign(w,v) for w,v in zip(current_vars, combined)]) with tf.get_default_graph().control_dependencies([op5]): return tf.no_op()
def apply_gradients(self, grads_and_vars, global_step=None, name=None): d_vars = [] g_vars = [] for grad,var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] elif var in self.gan.g_vars(): g_vars += [var] else: raise Exception("Couldn't find var in g_vars or d_vars") if self.config.apply_on == "discriminator": depth_vars = d_vars else: depth_vars = d_vars + g_vars with ops.init_scope(): [self._get_or_make_slot(v, v, "depth", self.name) for v in depth_vars] self.optimizer._create_slots([v for g,v in grads_and_vars]) for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): self._zeros_slot(var, "depth", self.name) self._prepare() depth_slots = [self.get_slot(v, "depth") for v in depth_vars] for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): depth_vars += [var] depth_slots += [self._zeros_slot(var, "depth", self.name)] def calculate_depth(grads_and_vars_k,k=0): if(k == 0): return tf.group(*[tf.assign(v,nv) for v,nv in zip(depth_vars, depth_slots)]) op2 = self.optimizer.apply_gradients(grads_and_vars_k, global_step=global_step, name=name) with tf.get_default_graph().control_dependencies([op2]): w_k_combined = [self._decay *w_k_1 + (1.-self._decay)*w_hat for w_hat, w_k_1 in zip(depth_slots, depth_vars)] op3 = tf.group(*[tf.assign(w, v) for w,v in zip(depth_slots, w_k_combined)]) # store variables with tf.get_default_graph().control_dependencies([op3]): d_loss, g_loss = self.gan.loss.sample d_grads = tf.gradients(d_loss, d_vars) g_grads = tf.gradients(g_loss, g_vars) grads_k_1 = d_grads + g_grads grads_and_vars_k_1 = list(zip(grads_k_1,depth_vars)).copy() return calculate_depth(grads_and_vars_k_1,k-1) op1 = tf.group(*[tf.assign(w, v) for w,v in zip(depth_slots, depth_vars)]) # store variables with tf.get_default_graph().control_dependencies([op1]): opd = calculate_depth(grads_and_vars, self._depth) with tf.get_default_graph().control_dependencies([opd]): return tf.no_op()
def _get_tensor_from_node(self, node_id): """Resolves a node id into a tensor to be captured for a function.""" with ops.init_scope(): obj = self._nodes[node_id] if resource_variable_ops.is_resource_variable(obj): return obj.handle elif isinstance(obj, tracking.TrackableAsset): return obj.asset_path elif tensor_util.is_tensor(obj): return obj elif isinstance(obj, tracking.CapturableResource): # Note: this executes restored functions in the CapturableResource. return obj.resource_handle raise ValueError("Can't convert node %s to tensor" % (type(obj)))
def _initialize_uninitialized_variables(self, initializer_map): """Make and call a `ConcreteFunction` which initializes variables.""" # Note: using defun here avoids an infinite recursion. @function_lib.defun def initialize_variables(): for v, init in initializer_map.items(): with ops.init_scope(): if resource_variable_ops.var_is_initialized_op(v.handle): # Ignore variables which are already initialized at trace time. continue v.assign(lift_to_graph.lift_to_graph( init, ops.get_default_graph())[init]) with ops.init_scope(): return initialize_variables.get_concrete_function()()
def _init_from_args(self, name, shared_name): # pylint: disable=invalid-name """Initialize the CriticalSection from constructor arguments.""" with ops.name_scope(name, "CriticalSection", []) as name: with ops.init_scope(): # pylint: disable=protected-access container = ops.get_default_graph()._container # pylint: enable=protected-access if shared_name is None: shared_name = name if container is None: container = "" self._handle = gen_resource_variable_ops.mutex_v2( shared_name=shared_name, container=container, name=name) if not context.executing_eagerly(): ops.add_to_collections(CRITICAL_SECTIONS, self)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): all_vars = [ v for _,v in grads_and_vars] d_vars = [] g_vars = [] all_grads = [ g for g, _ in grads_and_vars ] for grad,var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] elif var in self.gan.g_vars(): g_vars += [var] else: raise("Couldn't find var in g_vars or d_vars") with ops.init_scope(): self.optimizer._create_slots([v for g,v in grads_and_vars]) self._prepare() d_grads = all_grads[:len(d_vars)] g_grads = all_grads[len(d_vars):] if self.config.finite_differences: return self.finite_differences(grads_and_vars, global_step, name, d_vars, g_vars, d_grads, g_grads) dc_grads = sum([tf.reduce_sum(tf.square(d)) for d in d_grads]) gc_grads = sum([tf.reduce_sum(tf.square(g)) for g in g_grads]) gamma12 = tf.gradients(gc_grads, d_vars) + [tf.zeros_like(g) for g in g_vars] gamma21 = [tf.zeros_like(d) for d in d_vars] + tf.gradients(dc_grads, g_vars) gamma12 = [ tf.zeros_like(ddg) if _dg is None else _dg for ddg, _dg in zip(all_vars, gamma12) ] gamma21 = [ tf.zeros_like(ddg) if _dg is None else _dg for ddg, _dg in zip(all_vars, gamma21) ] __gamma12 = [ tf.reduce_sum(_gamma12) for _gamma12 in gamma12 ] __gamma21 = [ tf.reduce_sum(_gamma21) for _gamma21 in gamma21 ] #gamma12_metric = self.gan.ops.squash(sum(gamma12)) gamma12_metric = self.gan.ops.squash(sum(__gamma12)) self.gan.add_metric('gamma12', gamma12_metric) gamma21_metric = self.gan.ops.squash(sum(__gamma21)) self.gan.add_metric('gamma21', gamma21_metric) new_grads = [] for _gamma12, _gamma21, _grads in zip(gamma12, gamma21, all_grads): Eo = _grads - \ 0.5*self._alpha*_gamma21 +\ 0.5*self._alpha*_gamma12 new_grads += [ Eo ] new_grads_and_vars = list(zip(new_grads, all_vars)).copy() return self.optimizer.apply_gradients(new_grads_and_vars, global_step=global_step, name=name)
def _initialize_uninitialized_variables(self, initializer_map): """Make and call a `ConcreteFunction` which initializes variables.""" # Note: using defun here avoids an infinite recursion. # Note: there is no reason not to autograph once the overhead is negligible. @function_lib.defun(autograph=False) # tf.function internal, pure graph def initialize_variables(): for v, init in initializer_map.items(): with ops.init_scope(): if resource_variable_ops.var_is_initialized_op(v.handle): # Ignore variables which are already initialized at trace time. continue v.assign(lift_to_graph.lift_to_graph( [init], ops.get_default_graph())[init]) with ops.init_scope(): return initialize_variables.get_concrete_function()()
def make_variable(name, shape=None, dtype=dtypes.float32, initializer=None, trainable=None, caching_device=None, validate_shape=True, constraint=None, use_resource=None, collections=None, synchronization=tf_variables.VariableSynchronization.AUTO, aggregation=tf_variables.VariableAggregation.NONE, partitioner=None): # pylint: disable=unused-argument """Temporary util to create a variable (relies on `variable_scope.variable`). Some reuse-related technicalities prevent us from using `variable_scope.get_variable()` directly, so we use a subcomponent that has fewer constraints (`variable_scope.variable()`). In the longer term, it seems like a similar "default variable creator" method should exist in `Trackable` instead. When this happens, we can get rid of this temporary solution. TODO(fchollet): remove this method when no longer needed. Arguments: name: Variable name. shape: Variable shape. dtype: The type of the variable. Defaults to `self.dtype` or `float32`. initializer: Initializer instance (callable). trainable: Whether the variable should be part of the layer's "trainable_variables" (e.g. variables, biases) or "non_trainable_variables" (e.g. BatchNorm mean, stddev). Note, if the current variable scope is marked as non-trainable then this parameter is ignored and any added variables are also marked as non-trainable. `trainable` defaults to `True` unless `synchronization` is set to `ON_READ`. caching_device: Passed to `tf.Variable`. validate_shape: Passed to `tf.Variable`. constraint: Constraint instance (callable). use_resource: Whether to use a `ResourceVariable`. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class `tf.VariableSynchronization`. By default the synchronization is set to `AUTO` and the current `DistributionStrategy` chooses when to synchronize. If `synchronization` is set to `ON_READ`, `trainable` must not be set to `True`. aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class `tf.VariableAggregation`. partitioner: Not handled at this time. Returns: Variable instance. """ initializing_from_value = False if initializer is not None and not callable(initializer): initializing_from_value = True with ops.init_scope(): if initializing_from_value: init_val = initializer variable_dtype = None else: # Instantiate initializer if provided initializer is a type object. if isinstance( initializer, (type(init_ops.Initializer), type(init_ops_v2.Initializer))): initializer = initializer() init_val = lambda: initializer(shape, dtype=dtype) variable_dtype = dtype.base_dtype if use_resource is None: use_resource = True # TODO(apassos,rohanj) figure out how to remove collections from here so we # can remove the V1. variable_shape = tensor_shape.TensorShape(shape) return tf_variables.VariableV1( initial_value=init_val, name=name, trainable=trainable, caching_device=caching_device, dtype=variable_dtype, validate_shape=validate_shape, constraint=constraint, use_resource=use_resource, collections=collections, synchronization=synchronization, aggregation=aggregation, shape=variable_shape if variable_shape else None)
def func_graph_from_py_func(name, python_func, args, kwargs, signature=None, func_graph=None, experimental_autograph=False, add_control_dependencies=True, arg_names=None, op_return_value=None): """Returns a `FuncGraph` generated from `python_func`. Args: name: an identifier for the function. python_func: the Python function to trace. args: the positional args with which the Python function should be called; ignored if a signature is provided. kwargs: the keyword args with which the Python function should be called; ignored if a signature is provided. signature: a possibly nested sequence of `TensorSpecs` specifying the shapes and dtypes of the arguments. When a signature is provided, `args` and `kwargs` are ignored, and `python_func` is traced with Tensors conforming to `signature`. If `None`, the shapes and dtypes are inferred from the inputs. func_graph: Optional. An instance of FuncGraph. If provided, we will use this graph else a new one is built and returned. experimental_autograph: whether to use autograph to compile `python_func`. See https://www.tensorflow.org/guide/autograph for more information. add_control_dependencies: If True, automatically adds control dependencies to ensure program order matches execution order and stateful ops always execute. arg_names: Optional list of argument names, used to give input placeholders recognizable names. op_return_value: Optional. A Tensor. If set and `python_func` returns Operations, those return values will be replaced with this value. If not set, returning an Operation triggers an error. Returns: A FuncGraph. Raises: TypeError: If any of `python_func`'s return values is neither `None` nor a `Tensor`. """ if op_return_value is not None: assert isinstance(op_return_value, ops.Tensor), op_return_value if func_graph is None: func_graph = FuncGraph(name) assert isinstance(func_graph, FuncGraph) if add_control_dependencies: control_manager = AutomaticControlDependencies else: control_manager = ops.NullContextmanager with func_graph.as_default(), control_manager() as a: current_scope = variable_scope.get_variable_scope() default_use_recource = current_scope.use_resource current_scope.set_use_resource(True) if signature is not None: args = signature kwargs = {} func_args = _get_defun_inputs_from_args(args, arg_names) func_kwargs = _get_defun_inputs_from_kwargs(kwargs) # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`. # Variables to help check whether mutation happens in calling the function # Copy the recursive list, tuple and map structure, but not base objects func_args_before = nest.pack_sequence_as(func_args, nest.flatten(func_args)) func_kwargs_before = nest.pack_sequence_as(func_kwargs, nest.flatten(func_kwargs)) def convert(x): """Converts a function output to a Tensor.""" if x is None: return None if op_return_value is not None and isinstance(x, ops.Operation): # TODO(b/79881896): we currently can't capture external control deps, so # this won't work if x needs to be captured (i.e. if python_func returns # captured Operations). with ops.control_dependencies([x]): x = array_ops.identity(op_return_value) else: try: x = ops.convert_to_tensor_or_indexed_slices(x) except (ValueError, TypeError): raise TypeError( "To be compatible with tf.contrib.eager.defun, Python functions " "must return zero or more Tensors; in compilation of %s, found " "return value of type %s, which is not a Tensor." % (str(python_func), type(x))) if add_control_dependencies: x = a.mark_as_return(x) return x this_tape = tape.push_new_tape() try: if experimental_autograph: from tensorflow.python import autograph # pylint: disable=g-import-not-at-top func_outputs = autograph.converted_call( python_func, None, autograph.ConversionOptions( verbose=True, recursive=True, strip_decorators=(function.defun, ), optional_features=(), ), *func_args, **func_kwargs) else: func_outputs = python_func(*func_args, **func_kwargs) # invariant: `func_outputs` contains only Tensors and `None`s. func_outputs = nest.map_structure(convert, func_outputs) check_mutation(func_args_before, func_args) check_mutation(func_kwargs_before, func_kwargs) finally: tape.pop_tape(this_tape) current_scope.set_use_resource(default_use_recource) # Variables in `func_args`, `func_kwargs` should be explicit inputs # to the function, not captured inputs. tape_variables = this_tape.watched_variables() arg_variables = set() inputs = [] for arg in nest.flatten(func_args) + nest.flatten(func_kwargs): if isinstance(arg, resource_variable_ops.ResourceVariable): try: resource_placeholder = func_graph.captures.pop(arg.handle) arg_variables.add(arg) except KeyError: # This case occurs if a Variable among the inputs is not actually # used by the function; we still add an explicit input for it # because the user should presumably pass the Variable as an input # to the corresponding graph function. resource_placeholder = _create_substitute_placeholder( arg.handle) inputs.append(resource_placeholder) elif isinstance(arg, ops.Tensor): inputs.append(arg) variables = [v for v in tape_variables if v not in arg_variables] func_graph.inputs = inputs + list(func_graph.captures.values()) func_graph.structured_outputs = func_outputs # Returning a closed-over tensor does not trigger convert_to_tensor. func_graph.outputs.extend( func_graph.capture(x) for x in flatten(func_graph.structured_outputs) if x is not None) func_graph.variables = variables # Register any other functions defined in the graph. with ops.init_scope(): if context.executing_eagerly(): for f in func_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? context.add_function(f._c_func.func) # pylint: disable=protected-access return func_graph
def _create_keras_history_helper(tensors, processed_ops, created_layers): """Helper method for `create_keras_history`. Args: tensors: A structure of Tensors for which to create Keras metadata. processed_ops: Set. TensorFlow operations that have already been wrapped in `TensorFlowOpLayer` instances. created_layers: List. The `TensorFlowOpLayer` instances created. Returns: Tuple. First element is the updated set of TensorFlow Operations that have been wrapped in `TensorFlowOpLayer` instances. Second element is a list of the `TensorFlowOpLayer` instances created. """ if ops.executing_eagerly_outside_functions(): raise ValueError( '`create_keras_history` should only be called if eager is disabled!' ) # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. # Cannot be imported at top because of circular dependencies. # TODO(omalleyt): Resolve circular dependency. from tensorflow.python.keras.engine import base_layer # pylint: disable=g-import-not-at-top tensor_list = nest.flatten(tensors) sparse_ops = [] ragged_tensors = [] for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue if isinstance( tensor, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): sparse_ops.append(tensor.op) continue if tf_utils.is_ragged(tensor): # Ragged tensors don't have an op property ragged_tensors.append(tensor) continue op = tensor.op # The Op that created this Tensor. if op not in processed_ops: # Recursively set `_keras_history`. op_inputs = list(op.inputs) constants = {} layer_inputs = [] for i, op_input in enumerate(op_inputs): if uses_keras_history(op_input): layer_inputs.append(op_input) else: # Treat any value not originating from a `keras.Input` as # a constant. Variables cannot be supported. ds_with_session = ( distribution_strategy_context.in_cross_replica_context( ) and not ops.executing_eagerly_outside_functions()) using_xla = control_flow_util.GraphOrParentsInXlaContext( ops.get_default_graph()) if ds_with_session or using_xla or _UNSAFE_GRAPH_OP_LAYER_CREATION: # In Legacy Graph mode, evaluating here makes Session be # configured improperly. The downside of this is that saving # via `get_config` breaks, but SavedModel still works. constants[i] = op_input else: with ops.init_scope(): constants[i] = backend.function([], op_input)([]) layer_inputs = unnest_if_single_tensor(layer_inputs) processed_ops, created_layers = _create_keras_history_helper( layer_inputs, processed_ops, created_layers) name = op.name node_def = op.node_def.SerializeToString() op_layer = base_layer.TensorFlowOpLayer(node_def, constants=constants, name=name) created_layers.append(op_layer) op_layer._set_connectivity_metadata( # pylint: disable=protected-access args=(layer_inputs, ), kwargs={}, outputs=op.outputs) processed_ops.update([op]) if sparse_ops or ragged_tensors: lambda_example = """ weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) output = tf.keras.layers.Lambda(weights_mult)(input) """ raise ValueError( 'Tensorflow ops that generate ragged or sparse tensor ' 'outputs are currently not supported by Keras automatic ' 'op wrapping. Please wrap these ops in a Lambda layer: ' '\n\n```\n{example}\n```\n' 'Sparse ops encountered: {sparse_ops}\n' 'Ragged tensors encountered: {ragged_tensors}\n'.format( example=lambda_example, sparse_ops=str(sparse_ops), ragged_tensors=str(ragged_tensors))) return processed_ops, created_layers
def load(export_dir, tags=None): """Load a SavedModel from `export_dir`. Signatures associated with the SavedModel are available as functions: ```python imported = tf.saved_model.load(path) f = imported.signatures["serving_default"] print(f(x=tf.constant([[1.]]))) ``` Objects exported with `tf.saved_model.save` additionally have checkpointable objects and functions assigned to attributes: ```python exported = tf.train.Checkpoint(v=tf.Variable(3.)) exported.f = tf.function( lambda x: exported.v * x, input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)]) tf.saved_model.save(exported, path) imported = tf.saved_model.load(path) assert 3. == imported.v.numpy() assert 6. == imported.f(x=tf.constant(2.)).numpy() ``` Args: export_dir: The SavedModel directory to load from. tags: A tag or sequence of tags identifying the MetaGraph to load. Optional if the SavedModel contains a single MetaGraph, as for those exported from `tf.saved_model.load`. Returns: A checkpointable object with a `signatures` attribute mapping from signature keys to functions. If the SavedModel was exported by `tf.saved_model.load`, it also points to checkpointable objects and functions which were attached to the exported object. Raises: ValueError: If `tags` don't match a MetaGraph in the SavedModel. """ if tags is not None: # Supports e.g. tags=SERVING and tags=[SERVING] tags = nest.flatten(tags) saved_model_proto = loader_impl.parse_saved_model(export_dir) object_graph_filename = os.path.join( compat.as_bytes(export_dir), compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY), compat.as_bytes("object_graph.pb")) if (file_io.file_exists(object_graph_filename) and len(saved_model_proto.meta_graphs) == 1): meta_graph_def = saved_model_proto.meta_graphs[0] if (tags is not None and set(tags) != set(meta_graph_def.meta_info_def.tags)): raise ValueError(( "The SavedModel at {} has one MetaGraph with tags {}, but got an " "incompatible argument tags={} to tf.saved_model.load. You may omit " "it, pass 'None', or pass matching tags.").format( export_dir, meta_graph_def.meta_info_def.tags, tags)) object_graph_proto = _load_saved_object_graph_proto( object_graph_filename) with ops.init_scope(): loader = _Loader(object_graph_proto, saved_model_proto, export_dir) root = loader.get(0) else: with ops.init_scope(): root = load_v1_in_v2.load(export_dir, tags) return root
def maybe_init_scope(): if ops.executing_eagerly_outside_functions(): yield else: with ops.init_scope(): yield
def finite_differences(self, grads_and_vars, global_step, name, d_vars, g_vars, d_grads, g_grads): """ Attempt to directly compute hessian and apply equation (6) """ d_grads = [] g_grads = [] d_vars = [] g_vars = [] alpha = 0.5 if self.config.alpha is not None: alpha = self.gan.configurable_param(self.config.alpha) beta = 0.5 if self.config.beta is not None: beta = self.gan.configurable_param(self.config.beta) for grad, var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] d_grads += [grad] elif var in self.gan.g_vars(): g_vars += [var] g_grads += [grad] else: raise ("Couldn't find var in g_vars or d_vars") orig_grads = d_grads + g_grads all_vars = d_vars + g_vars def curl(): grads = tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients( self.gan.trainer.g_loss, g_vars) op3 = tf.group(*[ tf.assign_sub(v, self._lr_t * grad) for grad, v in zip(grads, all_vars) ]) with tf.get_default_graph().control_dependencies([op3]): def curlcombine(g1, g2): stepsize = self._lr_t return g1 - (g2 - g1) / stepsize new_grads = tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients( self.gan.trainer.g_loss, g_vars) g3s = [curlcombine(g1, g2) for g1, g2 in zip(grads, new_grads)] return g3s #gamma12 if self.config.method == 'curl': all_grads = curl() d_grads = all_grads[:len(d_vars)] g_grads = all_grads[len(d_vars):] all_grads = d_grads + g_grads with ops.init_scope(): [ self._zeros_slot(v, "orig", self._name) for _, v in grads_and_vars ] v1 = [self.get_slot(v, "orig") for v in all_vars] restored_vars = all_vars tmp_vars = v1 e1 = 0.0001 e2 = 0.0001 #gamma12 save = tf.group(*[ tf.assign(w, v) for w, v in zip(tmp_vars.copy(), restored_vars.copy()) ]) # store variables with tf.get_default_graph().control_dependencies([save]): #opboth = self.optimizer.apply_gradients(grads_and_vars, global_step=global_step, name=name) #opdp = self.optimizer.apply_gradients(grads_and_vars[:len(d_vars)], global_step=global_step, name=name) #opgp = self.optimizer.apply_gradients(grads_and_vars[len(d_vars):], global_step=global_step, name=name) restore = tf.group(*[ tf.assign(w, v) for w, v in zip(restored_vars.copy(), tmp_vars.copy()) ]) # store variables opboth = [ tf.assign_sub(w, self._lr_t * v) for w, v in zip(all_vars.copy(), all_grads.copy()) ] # store variables with tf.get_default_graph().control_dependencies( [tf.group(*opboth)]): if self.config.method == "curl": gboth = curl() else: gboth = tf.gradients(self.loss[0], d_vars) + tf.gradients( self.loss[1], g_vars) with tf.get_default_graph().control_dependencies([restore]): opd = opboth[:len(d_vars)] with tf.get_default_graph().control_dependencies( [tf.group(*opd)]): if self.config.method == "curl": new_d_grads = curl() else: new_d_grads = tf.gradients( self.loss[0], d_vars) + tf.gradients( self.loss[1], g_vars) with tf.get_default_graph().control_dependencies( [restore]): opg = opboth[len(d_vars):] with tf.get_default_graph().control_dependencies( [tf.group(*opg)]): if self.config.method == "curl": new_g_grads = curl() else: new_g_grads = tf.gradients( self.loss[0], d_vars) + tf.gradients( self.loss[1], g_vars) with tf.get_default_graph( ).control_dependencies([restore]): new_grads = [] for _gboth, _gd, _gg, _g, _orig_g in zip( gboth, new_d_grads, new_g_grads, (d_grads + g_grads), orig_grads): a = (_gg - _g) / self._lr_t # d2f/dx2i b = (_gboth - _gg) / ( 2 * self._lr_t) + (_gd - _g) / ( 2 * self._lr_t) # d2f/dx1dx2 c = (_gboth - _gd) / ( 2 * self._lr_t) + (_gg - _g) / ( 2 * self._lr_t) # d2f/dx1dx2 c = -c d = -(_gd - _g) / self._lr_t # d2f/dx2j if self.config.form == 5: a = (_gg - _g) / self._lr_t # d2f/dx2i b = (_gboth - _gg) / ( 2 * self._lr_t ) + (_gd - _g) / (2 * self._lr_t ) # d2f/dx1dx2 c = (_gboth - _gd) / ( 2 * self._lr_t ) + (_gg - _g) / (2 * self._lr_t ) # d2f/dx1dx2 d = (_gd - _g) / self._lr_t # d2f/dx2j J = np.array([[a, b], [c, d]]) Jt = np.transpose(J) det = a * d - b * c + 1e-8 #h_1 = 1.0/det * (b+d-a-c) h_1_a = d / det h_1_b = -b / det h_1_c = -c / det h_1_d = a / det Jinv = np.array([[h_1_a, h_1_b], [h_1_c, h_1_d]]) _j = Jt[0][0] * Jinv[0][0] * _g + Jt[ 1][0] * Jinv[1][0] * _g + Jt[0][ 1] * Jinv[0][1] * _g + Jt[1][ 1] * Jinv[1][1] * _g new_grads.append(alpha * _orig_g + beta * _j) new_grads_and_vars = list( zip(new_grads, all_vars)).copy() return self.optimizer.apply_gradients( new_grads_and_vars, global_step=global_step, name=name)
def _zero_debias(unbiased_var, value, decay): """Compute the delta required for a debiased Variable. All exponential moving averages initialized with Tensors are initialized to 0, and therefore are biased to 0. Variables initialized to 0 and used as EMAs are similarly biased. This function creates the debias updated amount according to a scale factor, as in https://arxiv.org/abs/1412.6980. To demonstrate the bias the results from 0-initialization, take an EMA that was initialized to `0` with decay `b`. After `t` timesteps of seeing the constant `c`, the variable have the following value: ``` EMA = 0*b^(t) + c*(1 - b)*b^(t-1) + c*(1 - b)*b^(t-2) + ... = c*(1 - b^t) ``` To have the true value `c`, we would divide by the scale factor `1 - b^t`. In order to perform debiasing, we use two shadow variables. One keeps track of the biased estimate, and the other keeps track of the number of updates that have occurred. Args: unbiased_var: A Variable representing the current value of the unbiased EMA. value: A Tensor representing the most recent value. decay: A Tensor representing `1-decay` for the EMA. Returns: The amount that the unbiased variable should be updated. Computing this tensor will also update the shadow variables appropriately. """ with variable_scope.variable_scope(unbiased_var.op.name, values=[unbiased_var, value, decay]) as scope: with ops.colocate_with(unbiased_var): with ops.init_scope(): biased_initializer = init_ops.zeros_initializer( dtype=unbiased_var.dtype)(unbiased_var.get_shape()) local_step_initializer = init_ops.zeros_initializer() def _maybe_get_unique(name): """Get name for a unique variable, if not `reuse=True`.""" if variable_scope.get_variable_scope().reuse: return name vs_vars = [ x.op.name for x in variable_scope.get_variable_scope().global_variables() ] full_name = variable_scope.get_variable_scope( ).name + "/" + name if full_name not in vs_vars: return name idx = 1 while full_name + ("_%d" % idx) in vs_vars: idx += 1 return name + ("_%d" % idx) biased_var = variable_scope.get_variable( _maybe_get_unique("biased"), initializer=biased_initializer, trainable=False) local_step = variable_scope.get_variable( _maybe_get_unique("local_step"), shape=[], dtype=unbiased_var.dtype, initializer=local_step_initializer, trainable=False) # Get an update ops for both shadow variables. update_biased = state_ops.assign_sub(biased_var, (biased_var - value) * decay, name=scope.name) update_local_step = local_step.assign_add(1) # Compute the value of the delta to update the unbiased EMA. Make sure to # use the new values of the biased variable and the local step. with ops.control_dependencies([update_biased, update_local_step]): # This function gets `1 - decay`, so use `1.0 - decay` in the exponent. unbiased_ema_delta = ( unbiased_var - biased_var.read_value() / (1 - math_ops.pow(1.0 - decay, local_step.read_value()))) return unbiased_ema_delta
def save(obj, export_dir, signatures=None): # pylint: disable=line-too-long """Exports the Checkpointable object `obj` to [SavedModel format](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md). Example usage: ```python class Adder(tf.train.Checkpoint): @tf.function(input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)]) def add(self, x): return x + x + 1. to_export = Adder() tf.saved_model.save(to_export, '/tmp/adder') ``` The resulting SavedModel is then servable with an input named "x", its value having any shape and dtype float32. The optional `signatures` argument controls which methods in `obj` will be available to programs which consume `SavedModel`s, for example serving APIs. Python functions may be decorated with `@tf.function(input_signature=...)` and passed as signatures directly, or lazily with a call to `get_concrete_function` on the method decorated with `@tf.function`. If the `signatures` argument is omitted, `obj` will be searched for `@tf.function`-decorated methods. If exactly one `@tf.function` is found, that method will be used as the default signature for the SavedModel. This behavior is expected to change in the future, when a corresponding `tf.saved_model.load` symbol is added. At that point signatures will be completely optional, and any `@tf.function` attached to `obj` or its dependencies will be exported for use with `load`. When invoking a signature in an exported SavedModel, `Tensor` arguments are identified by name. These names will come from the Python function's argument names by default. They may be overridden by specifying a `name=...` argument in the corresponding `tf.TensorSpec` object. Explicit naming is required if multiple `Tensor`s are passed through a single argument to the Python function. The outputs of functions used as `signatures` must either be flat lists, in which case outputs will be numbered, or a dictionary mapping string keys to `Tensor`, in which case the keys will be used to name outputs. Since `tf.keras.Model` objects are also Checkpointable, this function can be used to export Keras models. For example, exporting with a signature specified: ```python class Model(tf.keras.Model): @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)]) def serve(self, serialized): ... m = Model() tf.saved_model.save(m, '/tmp/saved_model/') ``` Exporting from a function without a fixed signature: ```python class Model(tf.keras.Model): @tf.function def call(self, x): ... m = Model() tf.saved_model.save( m, '/tmp/saved_model/', signatures=m.call.get_concrete_function( tf.TensorSpec(shape=[None, 3], dtype=tf.float32, name="inp"))) ``` `tf.keras.Model` instances constructed from inputs and outputs already have a signature and so do not require a `@tf.function` decorator or a `signatures` argument. If neither are specified, the model's forward pass is exported. ```python x = input_layer.Input((4,), name="x") y = core.Dense(5, name="out")(x) model = training.Model(x, y) tf.saved_model.save(model, '/tmp/saved_model/') # The exported SavedModel takes "x" with shape [None, 4] and returns "out" # with shape [None, 5] ``` Variables must be tracked by assigning them to an attribute of a tracked object or to an attribute of `obj` directly. TensorFlow objects (e.g. layers from `tf.keras.layers`, optimizers from `tf.train`) track their variables automatically. This is the same tracking scheme that `tf.train.Checkpoint` uses, and an exported `Checkpoint` object may be restored as a training checkpoint by pointing `tf.train.Checkpoint.restore` to the SavedModel's "variables/" subdirectory. Currently variables are the only stateful objects supported by `tf.saved_model.save`, but others (e.g. tables) will be supported in the future. `tf.function` does not hard-code device annotations from outside the function body, instead using the calling context's device. This means for example that exporting a model which runs on a GPU and serving it on a CPU will generally work, with some exceptions. `tf.device` annotations inside the body of the function will be hard-coded in the exported model; this type of annotation is discouraged. Device-specific operations, e.g. with "cuDNN" in the name or with device-specific layouts, may cause issues. Currently a `DistributionStrategy` is another exception: active distribution strategies will cause device placements to be hard-coded in a function. Exporting a single-device computation and importing under a `DistributionStrategy` is not currently supported, but may be in the future. SavedModels exported with `tf.saved_model.save` [strip default-valued attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes) automatically, which removes one source of incompatibilities when the consumer of a SavedModel is running an older TensorFlow version than the producer. There are however other sources of incompatibilities which are not handled automatically, such as when the exported model contains operations which the consumer does not have definitions for. The current implementation of `tf.saved_model.save` targets serving use-cases, but omits information which will be necessary for the planned future implementation of `tf.saved_model.load`. Exported models using the current `save` implementation, and other existing SavedModels, will not be compatible with `tf.saved_model.load` when it is implemented. Further, `save` will in the future attempt to export `@tf.function`-decorated methods which it does not currently inspect, so some objects which are exportable today will raise exceptions on export in the future (e.g. due to complex/non-serializable default arguments). Such backwards-incompatible API changes are expected only prior to the TensorFlow 2.0 release. Args: obj: A checkpointable object to export. export_dir: A directory in which to write the SavedModel. signatures: Optional, either a `tf.function` with an input signature specified or the result of `f.get_concrete_function` on a `@tf.function`-decorated function `f`, in which case `f` will be used to generate a signature for the SavedModel under the default serving signature key. `signatures` may also be a dictionary, in which case it maps from signature keys to either `tf.function` instances with input signatures or concrete functions. The keys of such a dictionary may be arbitrary strings, but will typically be from the `tf.saved_model.signature_constants` module. Raises: ValueError: If `obj` is not checkpointable. @compatibility(eager) Not supported when graph building. From TensorFlow 1.x, `tf.enable_eager_execution()` must run first. May not be called from within a function body. @end_compatibility """ if not context.executing_eagerly(): with ops.init_scope(): if context.executing_eagerly(): raise AssertionError( "tf.saved_model.save is not supported inside a traced " "@tf.function. Move the call to the outer eagerly-executed " "context.") else: raise AssertionError( "tf.saved_model.save is not supported when graph building. " "tf.enable_eager_execution() must run first when calling it from " "TensorFlow 1.x.") # pylint: enable=line-too-long if not isinstance(obj, base.CheckpointableBase): raise ValueError( "Expected a Checkpointable object for export, got {}.".format(obj)) if signatures is None: # Note that we run this before saving the checkpoint, since looping over # attributes may have the side effect of creating variables in some cases. signatures = _find_function_to_export(obj) signatures = _canonicalize_signatures(signatures) # TODO(allenl): Factor out some subset of SavedModelBuilder which is 2.x # compatible (no sessions) and share it with this export API rather than # making a SavedModel proto and writing it directly. saved_model = saved_model_pb2.SavedModel() meta_graph_def = saved_model.meta_graphs.add() object_saver = util.CheckpointableSaver(obj) asset_info = _fill_meta_graph_def(meta_graph_def, obj, signatures, object_saver) saved_model.saved_model_schema_version = ( constants.SAVED_MODEL_SCHEMA_VERSION) # So far we've just been generating protocol buffers with no I/O. Now we write # the checkpoint, copy assets into the assets directory, and write out the # SavedModel proto itself. utils_impl.get_or_create_variables_dir(export_dir) object_saver.save(utils_impl.get_variables_path(export_dir)) builder_impl.copy_assets_to_destination_dir(asset_info.asset_filename_map, export_dir) path = os.path.join(compat.as_bytes(export_dir), compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB)) file_io.write_string_to_file(path, saved_model.SerializeToString()) _write_object_graph(obj, export_dir, asset_info.asset_index)
def _state_callback_wrapper(): with ops.init_scope(): return state_callback()
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. global_step: Optional `Variable` to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ # This is a default implementation of apply_gradients() that can be shared # by most optimizers. It relies on the subclass implementing the following # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse(). grads_and_vars = tuple( grads_and_vars) # Make sure repeat iteration works. if not grads_and_vars: raise ValueError("No variables provided.") converted_grads_and_vars = [] for g, v in grads_and_vars: if g is not None: try: # Convert the grad to Tensor or IndexedSlices if necessary. g = ops.convert_to_tensor_or_indexed_slices(g) except TypeError: raise TypeError("Gradient must be convertible to a Tensor" " or IndexedSlices, or None: %s" % g) if not isinstance(g, (ops.Tensor, ops.IndexedSlices)): raise TypeError( "Gradient must be a Tensor, IndexedSlices, or None: %s" % g) p = _get_processor(v) converted_grads_and_vars.append((g, v, p)) converted_grads_and_vars = tuple(converted_grads_and_vars) var_list = [v for g, v, _ in converted_grads_and_vars if g is not None] if not var_list: raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, _, v in converted_grads_and_vars], )) with ops.init_scope(): self._create_slots([_get_variable_for(v) for v in var_list]) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() for grad, var, processor in converted_grads_and_vars: if grad is None: continue # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. # TODO(apassos): figure out how to get the variable name here. scope_name = var.op.name if context.in_graph_mode() else "" with ops.name_scope("update_" + scope_name), ops.colocate_with(var): update_ops.append(processor.update_op(self, grad)) if global_step is None: apply_updates = self._finish(update_ops, name) else: with ops.control_dependencies( [self._finish(update_ops, "update")]): with ops.colocate_with(global_step): if isinstance(global_step, resource_variable_ops.ResourceVariable): # TODO(apassos): the implicit read in assign_add is slow; consider # making it less so. apply_updates = resource_variable_ops.assign_add_variable_op( global_step.handle, ops.convert_to_tensor(1, dtype=global_step.dtype), name=name) else: apply_updates = state_ops.assign_add(global_step, 1, name=name) if context.in_graph_mode(): if isinstance(apply_updates, ops.Tensor): apply_updates = apply_updates.op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) if apply_updates not in train_op: train_op.append(apply_updates) return apply_updates
def func_graph_from_py_func(name, python_func, args, kwargs, signature=None, func_graph=None, autograph=False, autograph_options=None, add_control_dependencies=True, arg_names=None, op_return_value=None, collections=None, capture_by_value=None, override_flat_arg_shapes=None): """Returns a `FuncGraph` generated from `python_func`. Args: name: an identifier for the function. python_func: the Python function to trace. args: the positional args with which the Python function should be called; ignored if a signature is provided. kwargs: the keyword args with which the Python function should be called; ignored if a signature is provided. signature: a possibly nested sequence of `TensorSpecs` specifying the shapes and dtypes of the arguments. When a signature is provided, `args` and `kwargs` are ignored, and `python_func` is traced with Tensors conforming to `signature`. If `None`, the shapes and dtypes are inferred from the inputs. func_graph: Optional. An instance of FuncGraph. If provided, we will use this graph else a new one is built and returned. autograph: whether to use autograph to compile `python_func`. See https://www.tensorflow.org/guide/autograph for more information. autograph_options: additional knobs to control when `autograph=True`. See https://www.tensorflow.org/guide/autograph for more information. add_control_dependencies: If True, automatically adds control dependencies to ensure program order matches execution order and stateful ops always execute. arg_names: Optional list of argument names, used to give input placeholders recognizable names. op_return_value: Optional. A Tensor. If set and `python_func` returns Operations, those return values will be replaced with this value. If not set, returning an Operation triggers an error. collections: a dictionary of collections this FuncGraph should start with. If not specified (None), the FuncGraph will read (but not write to) the outer graph's collections that are not whitelisted, and both read and write to the outer graph's collections that are whitelisted. The current whitelisted collections are the global variables, the local variables, and the trainable variables. Defaults to None. capture_by_value: An optional boolean. If True, the func graph will capture Variables by value instead of reference. By default inherit from outer graphs, and failing that will default to False. override_flat_arg_shapes: An optional list of instances that are either `None` or `TensorShape`. The length must match that of `nest.flatten((args, kwargs))`. The entries containing value `None` must match entries in flattened arguments containing non-tensors, while entries containing a `TensorShape` must match entries in the flattened arguments containing tensors. Returns: A FuncGraph. Raises: TypeError: If any of `python_func`'s return values is neither `None` nor a `Tensor`. ValueError: If both `signature` and `override_flat_arg_shapes` are passed in. """ if op_return_value is not None: assert isinstance(op_return_value, ops.Tensor), op_return_value if func_graph is None: func_graph = FuncGraph(name, collections=collections, capture_by_value=capture_by_value) assert isinstance(func_graph, FuncGraph) if add_control_dependencies: control_manager = AutomaticControlDependencies() else: control_manager = ops.NullContextmanager() with func_graph.as_default(), control_manager as a: current_scope = variable_scope.get_variable_scope() default_use_recource = current_scope.use_resource current_scope.set_use_resource(True) if signature is not None and override_flat_arg_shapes is not None: raise ValueError( "Passed both signature and override_flat_arg_shapes: %s and %s." % (signature, override_flat_arg_shapes)) if signature is not None: args = signature kwargs = {} # Creates and names placeholders for all arguments. if override_flat_arg_shapes is not None: flat_args = nest.flatten(args) arg_shapes = override_flat_arg_shapes[:len(flat_args)] kwarg_shapes = override_flat_arg_shapes[len(flat_args):] else: arg_shapes = None kwarg_shapes = None func_args = _get_defun_inputs_from_args( args, arg_names, flat_shapes=arg_shapes) func_kwargs = _get_defun_inputs_from_kwargs( kwargs, flat_shapes=kwarg_shapes) # Convert all Tensors into TensorSpecs before saving the structured inputs. # If storing pure concrete functions that are not called through polymorphic # functions, we don't have access to FunctionSpec, so we need to call the # TensorSpecs by their `arg_names` for later binding. func_graph.structured_input_signature = ( convert_structure_to_signature(func_args, arg_names), convert_structure_to_signature(func_kwargs)) flat_func_args = nest.flatten(func_args) flat_func_kwargs = nest.flatten(func_kwargs) # Temporarily set inputs to allow graph building code to inspect # them. Reassigned below. func_graph.inputs = [arg for arg in flat_func_args + flat_func_kwargs if isinstance(arg, ops.Tensor)] # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`. # Variables to help check whether mutation happens in calling the function # Copy the recursive list, tuple and map structure, but not base objects func_args_before = nest.pack_sequence_as(func_args, flat_func_args) func_kwargs_before = nest.pack_sequence_as( func_kwargs, flat_func_kwargs) def convert(x): """Converts a function output to a Tensor.""" if x is None: return None if op_return_value is not None and isinstance(x, ops.Operation): # TODO(b/79881896): we currently can't capture external control deps, so # this won't work if x needs to be captured (i.e. if python_func returns # captured Operations). with ops.control_dependencies([x]): x = array_ops.identity(op_return_value) elif not isinstance(x, tensor_array_ops.TensorArray): try: x = ops.convert_to_tensor_or_composite(x) except (ValueError, TypeError): raise TypeError( "To be compatible with tf.contrib.eager.defun, Python functions " "must return zero or more Tensors; in compilation of %s, found " "return value of type %s, which is not a Tensor." % (str(python_func), type(x))) if add_control_dependencies: x = a.mark_as_return(x) return x this_tape = tape.push_new_tape() try: if autograph: from tensorflow.python import autograph # pylint: disable=g-import-not-at-top _, original_func = tf_decorator.unwrap(python_func) def wrapper(*args, **kwargs): # Note: functions annotated with @tf.function should always be # converted even though they would meet autograph's whitelisting # criteria. # If this assumption is ever broken, converted_call will need to # handle the possibility of original_func still being a shim, e.g. # bound to WeakrefSelf. return autograph.converted_call( original_func, None, autograph.ConversionOptions( verbose=autograph.Verbosity.BRIEF, recursive=True, strip_decorators=(def_function.function,), optional_features=autograph_options, force_conversion=True, ), args, kwargs) # Wrapping around a decorator allows checks like tf_inspect.getargspec # to be accurate. converted_func = tf_decorator.make_decorator(original_func, wrapper) python_func = tf_decorator.rewrap(python_func, original_func, converted_func) func_outputs = python_func(*func_args, **func_kwargs) # invariant: `func_outputs` contains only Tensors, IndexedSlices, # SparseTensors, TensorArrays and `None`s. func_outputs = nest.map_structure(convert, func_outputs) check_mutation(func_args_before, func_args) check_mutation(func_kwargs_before, func_kwargs) finally: tape.pop_tape(this_tape) current_scope.set_use_resource(default_use_recource) # Variables in `func_args`, `func_kwargs` should be explicit inputs # to the function, not captured inputs. tape_variables = this_tape.watched_variables() arg_variables = set() inputs = [] for arg in nest.flatten(func_args) + nest.flatten(func_kwargs): if isinstance(arg, resource_variable_ops.ResourceVariable): # Even if an argument variable was not used in the function, we've # already manually captured the resource Tensor when creating argument # placeholders. resource_placeholder = func_graph.captures.pop(arg.handle, None) if resource_placeholder is None: continue arg_variables.add(arg) inputs.append(resource_placeholder) elif isinstance(arg, ops.Tensor): inputs.append(arg) variables = [v for v in tape_variables if v not in arg_variables] func_graph.inputs = inputs + list(func_graph.captures.values()) func_graph.structured_outputs = func_outputs # Returning a closed-over tensor does not trigger convert_to_tensor. func_graph.outputs.extend( func_graph.capture(x) for x in flatten(func_graph.structured_outputs) if x is not None) func_graph.variables = variables if add_control_dependencies: func_graph.control_outputs.extend(control_manager.ops_which_must_run) # Register any other functions defined in the graph. with ops.init_scope(): if context.executing_eagerly(): for f in func_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? context.add_function(f._c_func.func) # pylint: disable=protected-access return func_graph
def _init_from_args(self, initial_value=None, trainable=True, collections=None, validate_shape=True, caching_device=None, name=None, dtype=None, constraint=None): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. validate_shape: Ignored. Provided for compatibility with tf.Variable. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). constraint: An optional projection function to be applied to the variable after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. @compatibility(eager) When Eager Execution is enabled, variables are never added to collections. It is not implicitly added to the `GLOBAL_VARIABLES` or `TRAINABLE_VARIABLES` collections, and the `collections` argument is ignored. @end_compatibility """ if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") if isinstance(initial_value, checkpointable.CheckpointInitialValue): self._maybe_initialize_checkpointable() self._update_uid = initial_value.checkpoint_position.restore_uid initial_value = initial_value.wrapped_value self._trainable = trainable if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ ops.GraphKeys.TRAINABLE_VARIABLES ] self._save_slice_info = None # Store the graph key so optimizers know how to only retrieve variables from # this graph. self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access with ops.init_scope(): self._in_graph_mode = not context.executing_eagerly() with ops.name_scope( name, "Variable", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access handle_name = ops._name_from_scope_name(name) if self._in_graph_mode: shared_name = handle_name else: # When in eager mode use a uid for the shared_name, to prevent # accidental sharing. shared_name = "%s_%d" % (handle_name, ops.uid()) if init_from_fn: # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. if self._in_graph_mode: attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % handle_name)])) with ops.get_default_graph()._attr_scope( {"_class": attr}): with ops.name_scope("Initializer"), ops.device( None): initial_value = ops.convert_to_tensor( initial_value(), name="initial_value", dtype=dtype) self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=shared_name, name=name, graph_mode=self._in_graph_mode) self._shape = initial_value.get_shape() else: initial_value = initial_value() with ops.name_scope("Initializer"): initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=shared_name, name=name, graph_mode=False) self._shape = initial_value.get_shape() # pylint: enable=protected-access # Or get the initial value from a Tensor or Python object. else: with ops.name_scope("Initializer"): initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) # pylint: disable=protected-access if (self._in_graph_mode and initial_value is not None and initial_value.op._get_control_flow_context() is not None): raise ValueError( "Initializer for variable %s is from inside a control-flow " "construct, such as a loop or conditional. When creating a " "variable inside a loop or conditional, use a lambda as the " "initializer." % name) # pylint: enable=protected-access self._handle = _eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=shared_name, name=name, graph_mode=self._in_graph_mode) self._shape = initial_value.get_shape() self._unique_id = shared_name self._initial_value = initial_value if self._in_graph_mode else None self._handle_name = handle_name + ":0" self._dtype = initial_value.dtype.base_dtype self._constraint = constraint if self._in_graph_mode: with ops.name_scope("IsInitialized"): self._is_initialized_op = ( gen_resource_variable_ops.var_is_initialized_op( self._handle)) if initial_value is not None: with ops.name_scope("Assign") as n, ops.colocate_with( self._handle): self._initializer_op = ( gen_resource_variable_ops.assign_variable_op( self._handle, self. _try_guard_against_uninitialized_dependencies( initial_value), name=n)) with ops.name_scope("Read"), ops.colocate_with( self._handle): # Manually assign reads to the handle's device to avoid log # messages. with ops.device(self._handle.device): value = self._read_variable_op() self._graph_element = value if caching_device is not None: # Variables may be created in a tf.device() or ops.colocate_with() # context. At the same time, users would expect caching device to # be independent of this context, and/or would not expect the # current device context to be merged with the caching device # spec. Therefore we reset the colocation stack before creating # the cached value. Note that resetting the colocation stack will # also reset the device stack. with ops.colocate_with(None, ignore_existing=True): with ops.device(caching_device): self._cached_value = array_ops.identity( value) else: self._cached_value = None else: gen_resource_variable_ops.assign_variable_op( self._handle, initial_value) self._is_initialized_op = None self._initializer_op = None self._graph_element = None if caching_device: with ops.device(caching_device): self._cached_value = self._read_variable_op() else: self._cached_value = None if not context.executing_eagerly(): ops.add_to_collections(collections, self) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self) if not self._in_graph_mode: # After the handle has been created, set up a way to clean it up when # executing eagerly. We'll hold the only reference to the deleter, so that # when this object is garbage collected the deleter will be too. This # means ResourceVariables can be part of reference cycles without those # cycles being uncollectable, and means that no __del__ will be defined at # all in graph mode. self._handle_deleter = EagerResourceDeleter( handle=self._handle, handle_device=self._handle.device) self._cached_shape_as_list = None
def iterations(self): """The number of gradient updates the optimizer has completed so far.""" with ops.init_scope(): graph = None if context.executing_eagerly( ) else ops.get_default_graph() return self._get_non_slot_variable('iterations', graph=graph)
def apply(self, var_list=None): """Maintains moving averages of variables. `var_list` must be a list of `Variable` or `Tensor` objects. This method creates shadow variables for all elements of `var_list`. Shadow variables for `Variable` objects are initialized to the variable's initial value. They will be added to the `GraphKeys.MOVING_AVERAGE_VARIABLES` collection. For `Tensor` objects, the shadow variables are initialized to 0 and zero debiased (see docstring in `assign_moving_average` for more details). shadow variables are created with `trainable=False` and added to the `GraphKeys.ALL_VARIABLES` collection. They will be returned by calls to `tf.global_variables()`. Returns an op that updates all shadow variables as described above. Note that `apply()` can be called multiple times with different lists of variables. Args: var_list: A list of Variable or Tensor objects. The variables and Tensors must be of types float16, float32, or float64. Returns: An Operation that updates the moving averages. Raises: TypeError: If the arguments are not all float16, float32, or float64. ValueError: If the moving average of one of the variables is already being computed. """ # TODO(touts): op_scope if var_list is None: var_list = variables.trainable_variables() zero_debias_true = set() # set of vars to set `zero_debias=True` for var in var_list: if var.dtype.base_dtype not in [ dtypes.float16, dtypes.float32, dtypes.float64 ]: raise TypeError( "The variables must be half, float, or double: %s" % var.name) if var in self._averages: raise ValueError("Moving average already computed for: %s" % var.name) # For variables: to lower communication bandwidth across devices we keep # the moving averages on the same device as the variables. For other # tensors, we rely on the existing device allocation mechanism. with ops.init_scope(): if isinstance(var, variables.Variable): avg = slot_creator.create_slot(var, var.initialized_value(), self._name, colocate_with_primary=True) # NOTE(mrry): We only add `tf.Variable` objects to the # `MOVING_AVERAGE_VARIABLES` collection. ops.add_to_collection( ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var) else: avg = slot_creator.create_zeros_slot( var, self._name, colocate_with_primary=(var.op.type in [ "Variable", "VariableV2", "VarHandleOp" ])) if self._zero_debias: zero_debias_true.add(avg) self._averages[var] = avg with ops.name_scope(self._name) as scope: decay = ops.convert_to_tensor(self._decay, name="decay") if self._num_updates is not None: num_updates = math_ops.cast(self._num_updates, dtypes.float32, name="num_updates") decay = math_ops.minimum(decay, (1.0 + num_updates) / (10.0 + num_updates)) updates = [] for var in var_list: zero_debias = self._averages[var] in zero_debias_true updates.append( assign_moving_average(self._averages[var], var, decay, zero_debias=zero_debias)) return control_flow_ops.group(*updates, name=scope)
def apply_gradients(self, grads_and_vars, name=None): """Apply gradients to variables. This is the second part of `minimize()`. It returns an `Operation` that applies gradients. Args: grads_and_vars: List of (gradient, variable) pairs as returned by `compute_gradients()`. name: Optional name for the returned operation. Default to the name passed to the `Optimizer` constructor. Returns: An `Operation` that applies the specified gradients. If `global_step` was not None, that operation also increments `global_step`. Raises: TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ grads_and_vars = _filter_grads(grads_and_vars) var_list = [v for (_, v) in grads_and_vars] if distribute_ctx.has_strategy(): reduced_grads = merge_grads(grads_and_vars) grads_and_vars = zip(reduced_grads, var_list) self._create_hypers() with ops.init_scope(): self._create_slots(var_list) update_ops = [] self._prepare(var_list) def update_grad_to_var(grad, var): """Apply gradient to variable.""" if isinstance(var, ops.Tensor): raise NotImplementedError("Trying to update a Tensor ", var) if isinstance(grad, ops.IndexedSlices): if var.constraint is not None: raise RuntimeError( "Cannot use a constraint function on a sparse variable." ) return self._resource_apply_sparse_duplicate_indices( grad.values, var, grad.indices) update_op = self._resource_apply_dense(grad, var) if var.constraint is not None: with ops.control_dependencies([update_op]): return var.assign(var.constraint(var)) else: return update_op with ops.name_scope(name, self._name) as name: for grad, var in grads_and_vars: scope_name = ("" if ops.executing_eagerly_outside_functions() else "_" + var.op.name) with ops.name_scope("update" + scope_name): update_ops.append(update_grad_to_var(grad, var)) # control dependencies does not work in per replica mode, please change # this once b/118841692 is fixed. # with ops.control_dependencies(update_ops): # apply_updates = self._iterations.assign_add(1).op apply_updates = merge_update_step(update_ops, self.iterations) return apply_updates
def failing_function(): a = constant_op.constant(1.) with ops.init_scope(): _ = a + a
def get_global_generator(): global global_generator if global_generator is None: with ops.init_scope(): global_generator = Generator.from_non_deterministic_state() return global_generator
def create_variable(): with ops.init_scope(): if not a: a.append(variables.Variable(initial_value_fn))
def _build_graph_network_for_inferred_shape(self, input_shape, input_dtype=None): if input_shape is None or not self.layers: return if not tf2.enabled() or not ops.executing_eagerly_outside_functions(): # This behavior is disabled in V1 or when eager execution is disabled. return if (not self._has_explicit_input_shape and not self._use_legacy_deferred_behavior): # Determine whether the input shape is novel, i.e. whether the model # should be rebuilt. input_shape = tuple(input_shape) if self._inferred_input_shape is None: new_shape = input_shape else: new_shape = relax_input_shape(self._inferred_input_shape, input_shape) if (new_shape is not None and new_shape != self._inferred_input_shape): # A novel shape has been received: we need to rebuild the model. # In case we are inside a graph function, we step out of it. with ops.init_scope(): inputs = input_layer.Input(batch_shape=new_shape, dtype=input_dtype, name=self.layers[0].name + '_input') layer_input = inputs created_nodes = set() for layer in self.layers: # Clear nodes previously created via this method. This prevents # node accumulation and ensures that e.g. `layer.output` is # always connected to `model.inputs` # (this is important e.g. for the feature extraction use case). # We don't just do `layer._inbound_nodes = []` in order # not to break shared layers added to Sequential models (which is # technically illegal as per the `add()` docstring, # but wasn't previously disabled). clear_previously_created_nodes(layer, self._created_nodes) try: # Create Functional API connection by calling the current layer layer_output = layer(layer_input) except: # pylint:disable=bare-except # Functional API calls may fail for a number of reasons: # 1) The layer may be buggy. In this case it will be easier for # the user to debug if we fail on the first call on concrete data, # instead of our own call on a symbolic input. # 2) The layer is dynamic (graph-incompatible) and hasn't # overridden `compute_output_shape`. In this case, it is # impossible to build a graph network. # 3) The layer is otherwise incompatible with the Functional API # (e.g. this is the case for some probabilistic layers that rely # on hacks and that do not return tensors). # In all these cases, we should avoid creating a graph network # (or we simply can't). self._use_legacy_deferred_behavior = True return if len(nest.flatten(layer_output)) != 1: raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG) # Keep track of nodes just created above track_nodes_created_by_last_call(layer, created_nodes) layer_input = layer_output outputs = layer_output self._created_nodes = created_nodes try: # Initialize a graph Network. This call will never fail for # a stack of valid Keras layers. # However some users have layers that are fundamentally incompatible # with the Functional API, which do not return tensors. In this # case, we fall back to the legacy deferred behavior. # TODO(fchollet): consider raising here, as we should not be # supporting such layers. self._init_graph_network(inputs, outputs) self._graph_initialized = True except: # pylint:disable=bare-except self._use_legacy_deferred_behavior = True self._inferred_input_shape = new_shape
def apply_gradients(self, grads_and_vars, global_step=None, name=None): var_list = [v for _, v in grads_and_vars] d_vars = [] g_vars = [] all_grads = [g for g, _ in grads_and_vars] for grad, var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] elif var in self.gan.g_vars(): g_vars += [var] else: raise ("Couldn't find var in g_vars or d_vars") with ops.init_scope(): f1 = [self._zeros_slot(v, "f", self._name) for v in var_list] with ops.init_scope(): v1 = [ self._get_or_make_slot(v, v, "v1", self._name) for v in var_list ] if self.config.include_slots: for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): self._zeros_slot(var, "pm", "pm") self._prepare() f1 = [self.get_slot(v, "f") for v in var_list] v1 = [self.get_slot(v, "v1") for v in var_list] slots_list = [] slots_vars = [] if self.config.include_slots: for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): slots_vars += [var] slots_list.append(self._zeros_slot(var, "pm", "pm")) current_vars = var_list + slots_vars tmp_vars = v1 + slots_list diff = [tf.square(v - t) for v, t in zip(current_vars, tmp_vars)] if isinstance(self.loss, list): grads = tf.gradients(self.loss[0], d_vars) + tf.gradients( self.loss[1], g_vars) else: grads = tf.gradients(self.loss, current_vars) f_accum = [(self.config.f_decay or 0.95) * f + tf.square(g) for f, g in zip(f1, grads)] self.gan.add_metric('f1', tf.reduce_sum([tf.reduce_sum(f) for f in f1])) reg = [tf.multiply(f, d) for f, d in zip(f1, diff)] ewc_loss = (self.config.lam or 17.5) / 2.0 * tf.reduce_sum( [tf.reduce_sum(r) for r in reg]) self.gan.add_metric('ewc', ewc_loss) save_weights = tf.group(*[ tf.assign(w, v) for w, v in zip(tmp_vars, current_vars) ]) # store variables if isinstance(self.loss, list): new_grads = tf.gradients(self.loss[0] + ewc_loss, d_vars) + tf.gradients( self.loss[1] + ewc_loss, g_vars) else: new_grads = tf.gradients(self.loss + ewc_loss, current_vars) step = self.optimizer.apply_gradients(list(zip(new_grads, current_vars)).copy(), global_step=global_step, name=name) store_f = tf.group(*[tf.assign(w, v) for w, v in zip(f1, f_accum)]) with tf.get_default_graph().control_dependencies([store_f]): with tf.get_default_graph().control_dependencies([step]): with tf.get_default_graph().control_dependencies( [save_weights]): return tf.no_op()
def __call__(self, *args, **kwds): """Calls the graph function.""" if self._created_variables: # In this case we have created variables on the first call, so we run the # defunned version which is guaranteed to never create variables. return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable elif self._stateful_fn is not None: # In this case we have not created variables on the first call. So we can # run the first trace but we should fail if variables are created. results = self._stateful_fn(*args, **kwds) if self._created_variables: raise ValueError( "Creating variables on a non-first call to a function" " decorated with tf.function.") return results # This is the first call of __call__, so we have to initialize. self._initialize(args, kwds) if self._lifted_all_initializers and self._lifted_placeholders: with ops.init_scope(): handles, placeholders = zip(*self._lifted_placeholders) if context.executing_eagerly(): lifted_fn = function_lib._EagerDefinedFunction( # pylint: disable=protected-access "initializer" + str(ops.uid()), self._lifted_initializer_graph, placeholders, [], {}) with tape.stop_recording(): lifted_fn.call(context.context(), list(handles)) return self._stateless_fn(*args, **kwds) canon_args, canon_kwds = self._canonicalize_function_inputs(args, kwds) if not self._created_variables: # If we did not create any variables the trace we have is good enough. return self._concrete_stateful_fn._filtered_call( canon_args, canon_kwds) # pylint: disable=protected-access def fn_with_cond(*inner_args, **inner_kwds): """Conditionally runs initialization if it's needed.""" condition = True for wr in self._created_variables: variable = wr() if variable is None: raise ValueError( "A tf.Variable created inside your tf.function has been" " garbage-collected. Your code needs to keep Python references" " to variables created inside `tf.function`s.\n" "\n" "A common way to raise this error is to create and return a" " variable only referenced inside your function:\n" "\n" "@tf.function\n" "def f():\n" " v = tf.Variable(1.0)\n" " return v\n" "\n" "v = f() # Crashes with this error message!\n" "\n" "The reason this crashes is that @tf.function annotated" " function returns a **`tf.Tensor`** with the **value** of the" " variable when the function is called rather than the" " variable instance itself. As such there is no code holding a" " reference to the `v` created inside the function and Python" " garbage collects it.\n" "\n" "The simplest way to fix this issue is to create variables" " outside the function and capture them:\n" "\n" "v = tf.Variable(1.0)\n" "\n" "@tf.function\n" "def f():\n" " return v\n" "\n" "f() # <tf.Tensor: ... numpy=1.>\n" "v.assign_add(1.)\n" "f() # <tf.Tensor: ... numpy=2.>") condition = math_ops.logical_and( condition, resource_variable_ops.var_is_initialized_op( variable.handle)) # We want to call stateless_fn if possible because it avoids recomputing # potentially expensive initializers. return control_flow_ops.cond( condition, lambda: self._stateless_fn(*inner_args, **inner_kwds), functools.partial( self._concrete_stateful_fn._filtered_call, # pylint: disable=protected-access inner_args, inner_kwds)) return function_lib.defun(fn_with_cond)(*canon_args, **canon_kwds)
def _init_from_args( self, initial_value=None, trainable=None, collections=None, caching_device=None, name=None, dtype=None, constraint=None, synchronization=None, aggregation=None, distribute_strategy=None, shape=None, ): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, the default, also adds the variable to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as the default list of variables to use by the `Optimizer` classes. Defaults to `True`, unless `synchronization` is set to `ON_READ`, in which case it defaults to `False`. collections: List of graph collections keys. The new variable is added to these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). constraint: An optional projection function to be applied to the variable after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class `tf.VariableSynchronization`. By default the synchronization is set to `AUTO` and the current `DistributionStrategy` chooses when to synchronize. aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class `tf.VariableAggregation`. distribute_strategy: DistributionStrategy under which this variable was created. shape: (optional) The shape of this variable. If None, the shape of `initial_value` will be used. When setting this argument to `tf.TensorShape(None)` (representing an unspecified shape), the variable can be assigned with values of different shapes. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. @compatibility(eager) When Eager Execution is enabled, variables are never added to collections. It is not implicitly added to the `GLOBAL_VARIABLES` or `TRAINABLE_VARIABLES` collections, and the `collections` argument is ignored. @end_compatibility """ ( synchronization, aggregation, trainable, ) = variables.validate_synchronization_aggregation_trainable( synchronization, aggregation, trainable, name) if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if (isinstance(initial_value, ops.Tensor) and hasattr(initial_value, "graph") and initial_value.graph.building_function): raise ValueError( "Tensor-typed variable initializers must either be " "wrapped in an init_scope or callable " "(e.g., `tf.Variable(lambda : " "tf.truncated_normal([10, 40]))`) when building " "functions. Please file a feature request if this " "restriction inconveniences you.") if collections is None: collections = [ops.GraphKeys.GLOBAL_VARIABLES] if not isinstance(collections, (list, tuple, set)): raise ValueError( "collections argument to Variable constructor must be a list, tuple, " "or set. Got %s of type %s" % (collections, type(collections))) if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") if isinstance(initial_value, trackable.CheckpointInitialValue): self._maybe_initialize_trackable() self._update_uid = initial_value.checkpoint_position.restore_uid initial_value = initial_value.wrapped_value if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ ops.GraphKeys.TRAINABLE_VARIABLES ] with ops.init_scope(): self._in_graph_mode = not context.executing_eagerly() with ops.name_scope( name, "TrainableWrapper", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access handle_name = ops.name_from_scope_name(name) handle_name = handle_name or "TrainableWrapperHandle" if self._in_graph_mode: shared_name = handle_name unique_id = shared_name else: # When in eager mode use a uid for the shared_name, to prevent # accidental sharing. unique_id = "%s_%d" % (handle_name, ops.uid()) shared_name = None # Never shared # Use attr_scope and device(None) to simulate the behavior of # colocate_with when the variable we want to colocate with doesn't # yet exist. device_context_manager = (ops.device if self._in_graph_mode else ops.NullContextmanager) attr = attr_value_pb2.AttrValue( list=attr_value_pb2.AttrValue.ListValue( s=[compat.as_bytes("loc:@%s" % handle_name)])) with ops.get_default_graph()._attr_scope({"_class": attr}): with ops.name_scope("Initializer"), device_context_manager( None): initial_value = ops.convert_to_tensor( initial_value() if init_from_fn else initial_value, name="initial_value", dtype=dtype, ) if shape is None: shape = initial_value.shape handle = resource_variable_ops.eager_safe_variable_handle( initial_value=initial_value, shape=None, # shape, shared_name=shared_name, name=name, graph_mode=self._in_graph_mode, ) # pylint: disable=protected-access if (self._in_graph_mode and initial_value is not None and initial_value.op._get_control_flow_context() is not None): raise ValueError( "Initializer for variable %s is from inside a control-flow " "construct, such as a loop or conditional. When creating a " "variable inside a loop or conditional, use a lambda as the " "initializer." % name) # pylint: enable=protected-access dtype = initial_value.dtype.base_dtype if self._in_graph_mode: with ops.name_scope("IsInitialized"): is_initialized_op = (gen_resource_variable_ops. var_is_initialized_op(handle)) if initial_value is not None: # pylint: disable=g-backslash-continuation with ops.name_scope("Assign") as n, ops.colocate_with( None, ignore_existing=True), ops.device( handle.device): # pylint: disable=protected-access initializer_op = gen_resource_variable_ops.assign_variable_op( handle, variables. _try_guard_against_uninitialized_dependencies( name, initial_value), name=n, ) # pylint: enable=protected-access # pylint: enable=g-backslash-continuation with ops.name_scope("Read"): # Manually assign reads to the handle's device to avoid log # messages. with ops.device(handle.device): with ops.control_dependencies([ gen_resource_variable_ops. assign_variable_op( handle, self.prefetch_values(), name="AssignBeforeInitRead", ) ]): value = gen_resource_variable_ops.read_variable_op( handle, dtype) graph_element = value if caching_device is not None: # Variables may be created in a tf.device() or ops.colocate_with() # context. At the same time, users would expect caching device to # be independent of this context, and/or would not expect the # current device context to be merged with the caching device # spec. Therefore we reset the colocation stack before creating # the cached value. Note that resetting the colocation stack will # also reset the device stack. with ops.colocate_with(None, ignore_existing=True): with ops.device(caching_device): cached_value = array_ops.identity(value) else: cached_value = None else: gen_resource_variable_ops.assign_variable_op( handle, initial_value) is_initialized_op = None initializer_op = None graph_element = None if caching_device: with ops.device(caching_device): with ops.control_dependencies([ gen_resource_variable_ops. assign_variable_op( handle, self.prefetch_values(), name="AssignBeforeInitRead", ) ]): cached_value = ( gen_resource_variable_ops.read_variable_op( handle, dtype)) else: cached_value = None if not context.executing_eagerly(): # Eager variables are only added to collections if they are part of an # eager variable store (otherwise in an interactive session they would # hog memory and cause OOM). This is done in ops/variable_scope.py. ops.add_to_collections(collections, self) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self) initial_value = initial_value if self._in_graph_mode else None super(resource_variable_ops.ResourceVariable, self).__init__( trainable=trainable, shape=shape, dtype=dtype, handle=handle, synchronization=synchronization, constraint=constraint, aggregation=aggregation, distribute_strategy=distribute_strategy, name=name, unique_id=unique_id, handle_name=handle_name, graph_element=graph_element, initial_value=initial_value, initializer_op=initializer_op, is_initialized_op=is_initialized_op, cached_value=cached_value, )
def variable(self, name, value, dtype): with ops.init_scope(): if name not in self.variables: self.variables[name] = variables.Variable(value, dtype=dtype) self.evaluate(self.variables[name].initializer) return self.variables[name]
def _zero_debias(strategy, unbiased_var, value, decay): """Compute the delta required for a debiased Variable. All exponential moving averages initialized with Tensors are initialized to 0, and therefore are biased to 0. Variables initialized to 0 and used as EMAs are similarly biased. This function creates the debias updated amount according to a scale factor, as in (Kingma et al., 2015). To demonstrate the bias the results from 0-initialization, take an EMA that was initialized to `0` with decay `b`. After `t` timesteps of seeing the constant `c`, the variable have the following value: ``` EMA = 0*b^(t) + c*(1 - b)*b^(t-1) + c*(1 - b)*b^(t-2) + ... = c*(1 - b^t) ``` To have the true value `c`, we would divide by the scale factor `1 - b^t`. In order to perform debiasing, we use two shadow variables. One keeps track of the biased estimate, and the other keeps track of the number of updates that have occurred. Args: strategy: `Strategy` used to create and update variables. unbiased_var: A Variable representing the current value of the unbiased EMA. value: A Tensor representing the most recent value. decay: A Tensor representing `1-decay` for the EMA. Returns: The amount that the unbiased variable should be updated. Computing this tensor will also update the shadow variables appropriately. References: Adam - A Method for Stochastic Optimization: [Kingma et al., 2015](https://arxiv.org/abs/1412.6980) ([pdf](https://arxiv.org/pdf/1412.6980.pdf)) """ with variable_scope.variable_scope(unbiased_var.name[:-len(":0")], values=[unbiased_var, value, decay]): with ops.init_scope(): biased_initializer = init_ops.zeros_initializer() local_step_initializer = init_ops.zeros_initializer() def _maybe_get_unique(name): """Get name for a unique variable, if not `reuse=True`.""" if variable_scope.get_variable_scope().reuse: return name vs_vars = [ x.op.name for x in variable_scope.get_variable_scope().global_variables() ] full_name = variable_scope.get_variable_scope().name + "/" + name if full_name not in vs_vars: return name idx = 1 while full_name + ("_%d" % idx) in vs_vars: idx += 1 return name + ("_%d" % idx) with strategy.extended.colocate_vars_with(unbiased_var): biased_var = variable_scope.get_variable( _maybe_get_unique("biased"), initializer=biased_initializer, shape=unbiased_var.get_shape(), dtype=unbiased_var.dtype, trainable=False) local_step = variable_scope.get_variable( _maybe_get_unique("local_step"), shape=[], dtype=unbiased_var.dtype, initializer=local_step_initializer, trainable=False) def update_fn(v, value, biased_var, local_step): update_biased = state_ops.assign_sub(biased_var, (biased_var - value) * decay) update_local_step = local_step.assign_add(1) # This function gets `1 - decay`, so use `1.0 - decay` in the exponent. bias_factor = 1 - math_ops.pow(1.0 - decay, update_local_step) return state_ops.assign(v, update_biased / bias_factor, name=ops.get_name_scope() + "/") return strategy.extended.update(unbiased_var, update_fn, args=(value, biased_var, local_step))
def add_weight(self, name, shape, dtype=None, initializer=None, regularizer=None, trainable=None, constraint=None, use_resource=None, synchronization=vs.VariableSynchronization.AUTO, aggregation=vs.VariableAggregation.NONE, partitioner=None, **kwargs): """Adds a new variable to the layer, or gets an existing one; returns it. Arguments: name: variable name. shape: variable shape. dtype: The type of the variable. Defaults to `self.dtype` or `float32`. initializer: initializer instance (callable). regularizer: regularizer instance (callable). trainable: whether the variable should be part of the layer's "trainable_variables" (e.g. variables, biases) or "non_trainable_variables" (e.g. BatchNorm mean, stddev). Note, if the current variable scope is marked as non-trainable then this parameter is ignored and any added variables are also marked as non-trainable. `trainable` defaults to `True` unless `synchronization` is set to `ON_READ`. constraint: constraint instance (callable). use_resource: Whether to use `ResourceVariable`. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class `tf.VariableSynchronization`. By default the synchronization is set to `AUTO` and the current `DistributionStrategy` chooses when to synchronize. If `synchronization` is set to `ON_READ`, `trainable` must not be set to `True`. aggregation: Indicates how a distributed variable will be aggregated. Accepted values are constants defined in the class `tf.VariableAggregation`. partitioner: (optional) partitioner instance (callable). If provided, when the requested variable is created it will be split into multiple partitions according to `partitioner`. In this case, an instance of `PartitionedVariable` is returned. Available partitioners include `tf.compat.v1.fixed_size_partitioner` and `tf.compat.v1.variable_axis_size_partitioner`. For more details, see the documentation of `tf.compat.v1.get_variable` and the "Variable Partitioners and Sharding" section of the API guide. **kwargs: Additional keyword arguments. Returns: The created variable. Usually either a `Variable` or `ResourceVariable` instance. If `partitioner` is not `None`, a `PartitionedVariable` instance is returned. Raises: RuntimeError: If called with partitioned variable regularization and eager execution is enabled. ValueError: When trainable has been set to True with synchronization set as `ON_READ`. """ for kwarg in kwargs: if kwarg != 'experimental_autocast': raise TypeError('Unknown keyword argument:', kwarg) if self._keras_style: return super(Layer, self).add_weight( name=name, shape=shape, dtype=dtype, initializer=initializer, regularizer=regularizer, trainable=trainable and self.trainable, constraint=constraint, use_resource=use_resource, synchronization=vs.VariableSynchronization.AUTO, aggregation=vs.VariableAggregation.NONE, partitioner=partitioner, **kwargs) if synchronization == vs.VariableSynchronization.ON_READ: if trainable: raise ValueError( 'Synchronization value can be set to ' 'VariableSynchronization.ON_READ only for non-trainable variables. ' 'You have specified trainable=True and ' 'synchronization=VariableSynchronization.ON_READ.') else: # Set trainable to be false when variable is to be synced on read. trainable = False elif trainable is None: trainable = True def _should_add_regularizer(variable, existing_variable_set): if base_layer_utils.is_split_variable(variable): for var in variable: if var in existing_variable_set: return False return True else: return variable not in existing_variable_set init_graph = None if not context.executing_eagerly(): default_graph = ops.get_default_graph() if default_graph.building_function: with ops.init_scope(): # Retrieve the variables from the graph into which variables # will be lifted; if initialization ops will be lifted into # the eager context, then there is nothing to retrieve, since variable # collections are not supported when eager execution is enabled. if not context.executing_eagerly(): init_graph = ops.get_default_graph() existing_variables = set( tf_variables.global_variables()) else: # Initialization ops will not be lifted out of the default graph. init_graph = default_graph existing_variables = set(tf_variables.global_variables()) if dtype is None: dtype = self.dtype or dtypes.float32 self._set_scope(None) reuse = self.built or self._reuse prev_len_trainable = len(self._trainable_weights) with vs.variable_scope(self._scope, reuse=reuse, auxiliary_name_scope=False) as scope: self._current_scope = scope with backend.name_scope(self._name_scope()): use_resource = (use_resource or self._use_resource_variables or scope.use_resource) if initializer is None: initializer = scope.initializer variable = super(Layer, self).add_weight( name, shape, dtype=dtypes.as_dtype(dtype), initializer=initializer, trainable=trainable and self.trainable, constraint=constraint, partitioner=partitioner, use_resource=use_resource, synchronization=synchronization, aggregation=aggregation, getter=vs.get_variable, **kwargs) if regularizer: if (ops.executing_eagerly_outside_functions() or _should_add_regularizer(variable, existing_variables)): self._handle_weight_regularization( name, variable, regularizer) if init_graph is not None: # Handle edge case where a custom getter has overridden `trainable`. # There is one known occurrence of this, in unit test # testBasicRNNCellNotTrainable in # contrib.rnn.python.kernel_tests.core_rnn_cell_test with init_graph.as_default(): trainable_variables = tf_variables.trainable_variables( ) if (trainable and self.trainable and variable not in trainable_variables): # A custom getter / variable scope overrode the trainable flag. extra_trainable_vars = self._trainable_weights[ prev_len_trainable:] self._trainable_weights = self._trainable_weights[: prev_len_trainable] self._non_trainable_weights += extra_trainable_vars return variable
def _real_mirrored_creator(devices, *args, **kwargs): """Creates one MirroredVariable on the current worker.""" unique_var_name = ops.get_default_graph().unique_name( kwargs["name"], mark_as_used=False).rstrip("/") # pylint: disable=protected-access collective_instance_key = self._collective_keys.get_instance_key( key_id=unique_var_name) # Only the first device participles in the broadcast of initial values. group_key = self._collective_keys.get_group_key([devices[0]]) group_size = self._num_workers if "initial_value" not in kwargs: raise ValueError("Initial value must be specified.") initial_value = kwargs["initial_value"] if callable(initial_value): initial_value_fn = initial_value else: initial_value_fn = lambda: initial_value value_list = [] for i, d in enumerate(devices): with ops.init_scope(), ops.device(d): if i == 0: # The initial value fn makes sure variables all initialized to # same values. The first device of the chief worker will send their # variable values to other workers. def _overridden_initial_value_fn(device=d, index=i): # pylint: disable=g-missing-docstring with ops.device(device): initial_value = initial_value_fn() assert not callable(initial_value) initial_value = ops.convert_to_tensor(initial_value) assert index == 0, index if self._num_workers > 1: if self._is_chief: bcast_send = collective_ops.broadcast_send( initial_value, initial_value.shape, initial_value.dtype, group_size, group_key, collective_instance_key) with ops.control_dependencies([bcast_send]): return array_ops.identity(initial_value) else: return collective_ops.broadcast_recv( initial_value.shape, initial_value.dtype, group_size, group_key, collective_instance_key) return initial_value else: # Give replicas meaningful distinct names: var0name = value_list[0].name.split(":")[0] # We append a / to variable names created on replicas with id > 0 to # ensure that we ignore the name scope and instead use the given # name as the absolute name of the variable. kwargs["name"] = "%s/replica_%d/" % (var0name, i) # Variables on non-first replica get initial values from the # variables created on the first device of each worker. def _overridden_initial_value_fn(device=d, index=i): assert index > 0 with ops.device(device): if context.executing_eagerly(): return array_ops.identity(value_list[0].value()) else: return array_ops.identity(value_list[0].initial_value) kwargs["initial_value"] = _overridden_initial_value_fn with context.device_policy(context.DEVICE_PLACEMENT_SILENT): # Don't record operations (e.g. other variable reads) during # variable creation. with tape.stop_recording(): v = next_creator(*args, **kwargs) if i == 0: actual_var_name = v.name.split(":")[0] assert unique_var_name == actual_var_name, "%r vs %r" % ( unique_var_name, actual_var_name) assert not isinstance(v, values.DistributedVariable) value_list.append(v) return value_list
def load(self, tags): """Creates an object from the MetaGraph identified by `tags`.""" meta_graph_def = self.get_meta_graph_def_from_tags(tags) load_shared_name_suffix = "_load_{}".format(ops.uid()) functions = function_deserialization.load_function_def_library( meta_graph_def.graph_def.library, load_shared_name_suffix=load_shared_name_suffix) # Replace existing functions in the MetaGraphDef with renamed functions so # we don't have duplicates or name collisions. meta_graph_def.graph_def.library.Clear() for function in functions.values(): meta_graph_def.graph_def.library.function.add().CopyFrom( function.function_def) # We've renamed functions and shared names. We need the same operation on # the GraphDef itself for consistency. for node_def in meta_graph_def.graph_def.node: function_deserialization.fix_node_def( node_def, functions, load_shared_name_suffix, debug_name="MetaGraph import") load_graph_returns = [None] wrapped = wrap_function.wrap_function(functools.partial( self.load_graph, load_graph_returns, meta_graph_def), signature=[]) saver, = load_graph_returns self.restore_variables(wrapped, saver) with wrapped.graph.as_default(): init_op = loader_impl.get_init_op( meta_graph_def ) or monitored_session.Scaffold.default_local_init_op() # Add a dummy Tensor we know we can fetch to add control dependencies to. init_anchor = constant_op.constant(0., name="dummy_fetch") root = tracking.AutoTrackable() asset_feed_tensors = [] asset_paths = [] for tensor_name, value in loader_impl.get_asset_tensors( self._export_dir, meta_graph_def).items(): asset_feed_tensors.append( wrapped.graph.as_graph_element(tensor_name)) asset_paths.append(tracking.TrackableAsset(value)) init_fn = wrapped.prune( feeds=asset_feed_tensors, fetches=[init_anchor, wrapped.graph.as_graph_element(init_op)]) initializer = _Initializer(init_fn, asset_paths) # pylint: disable=protected-access local_init_op, _ = initializer._initialize() # pylint: enable=protected-access with ops.init_scope(): if not context.executing_eagerly(): ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, local_init_op) for variable in wrapped.graph.get_collection_ref( ops.GraphKeys.LOCAL_VARIABLES): # pylint: disable=protected-access variable._initializer_op = local_init_op # pylint: enable=protected-access root.initializer = initializer root.asset_paths = asset_paths signature_functions = self._extract_signatures(wrapped, meta_graph_def) root.signatures = signature_serialization.create_signature_map( signature_functions) root.variables = list(wrapped.graph.variables) root.tensorflow_version = ( meta_graph_def.meta_info_def.tensorflow_version) root.tensorflow_git_version = ( meta_graph_def.meta_info_def.tensorflow_git_version) root.graph = wrapped.graph root.prune = wrapped.prune return root
def _create_keras_history_helper(tensors, processed_ops, created_layers): """Helper method for `create_keras_history`. Arguments: tensors: A structure of Tensors for which to create Keras metadata. processed_ops: Set. TensorFlow operations that have already been wrapped in `TensorFlowOpLayer` instances. created_layers: List. The `TensorFlowOpLayer` instances created. Returns: Tuple. First element is the updated set of TensorFlow Operations that have been wrapped in `TensorFlowOpLayer` instances. Second element is a list of the `TensorFlowOpLayer` instances created. """ # Import of `base_layer` needed in order to create `TensorFlowOpLayer`. # Cannot be imported at top because of circular dependencies. # TODO(omalleyt): Resolve circular dependency. from tensorflow.python.keras.engine import base_layer # pylint: disable=g-import-not-at-top tensor_list = nest.flatten(tensors) for tensor in tensor_list: if getattr(tensor, '_keras_history', None) is not None: continue op = tensor.op # The Op that created this Tensor. if op not in processed_ops: if op.type.startswith('Sparse'): lambda_example = """ weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights) output = tf.keras.layers.Lambda(weights_mult)(input) """ raise ValueError( 'Sparse ops are not supported with functional models with built-in ' 'layer wrapping. Please wrap the sparse ops in a Lambda layer like' ': \n{lambda_example}\n'.format( lambda_example=lambda_example)) # Recursively set `_keras_history`. op_inputs = list(op.inputs) constants = {} layer_inputs = [] for i, op_input in enumerate(op_inputs): if uses_keras_history(op_input): layer_inputs.append(op_input) else: # Treat any value not originating from a `keras.Input` as # a constant. Variables cannot be supported. ds_with_session = ( distribution_strategy_context.in_cross_replica_context( ) and not ops.executing_eagerly_outside_functions()) using_xla = control_flow_util.GraphOrParentsInXlaContext( ops.get_default_graph()) if ds_with_session or using_xla: # In Legacy Graph mode, evaluating here makes Session be # configured improperly. The downside of this is that saving # via `get_config` breaks, but SavedModel still works. constants[i] = op_input else: with ops.init_scope(): constants[i] = backend.function([], op_input)([]) layer_inputs = unnest_if_single_tensor(layer_inputs) processed_ops, created_layers = _create_keras_history_helper( layer_inputs, processed_ops, created_layers) name = op.name node_def = op.node_def.SerializeToString() op_layer = base_layer.TensorFlowOpLayer(node_def, constants=constants, name=name) created_layers.append(op_layer) op_layer._add_inbound_node( # pylint: disable=protected-access layer_inputs, op.outputs) processed_ops.update([op]) return processed_ops, created_layers
def _add_variable_with_custom_getter(self, name, shape=None, dtype=dtypes.float32, initializer=None, getter=None, overwrite=False, **kwargs_for_getter): """Restore-on-create for a variable be saved with this `Checkpointable`. If the user has requested that this object or another `Checkpointable` which depends on this object be restored from a checkpoint (deferred loading before variable object creation), `initializer` may be ignored and the value from the checkpoint used instead. Args: name: A name for the variable. Must be unique within this object. shape: The shape of the variable. dtype: The data type of the variable. initializer: The initializer to use. Ignored if there is a deferred restoration left over from a call to `_restore_from_checkpoint_position`. getter: The getter to wrap which actually fetches the variable. overwrite: If True, disables unique name and type checks. **kwargs_for_getter: Passed to the getter. Returns: The new variable object. Raises: ValueError: If the variable name is not unique. """ self._maybe_initialize_checkpointable() with ops.init_scope(): if context.executing_eagerly(): # If this is a variable with a single Tensor stored in the checkpoint, # we can set that value as an initializer rather than initializing and # then assigning (when executing eagerly). This call returns None if # there is nothing to restore. checkpoint_initializer = self._preload_simple_restoration( name=name, shape=shape) else: checkpoint_initializer = None if (checkpoint_initializer is not None and not (isinstance(initializer, CheckpointInitialValue) and (initializer.restore_uid > checkpoint_initializer.restore_uid))): # If multiple Checkpointable objects are "creating" the same variable # via the magic of custom getters, the one with the highest restore UID # (the one called last) has to make the final initializer. If another # custom getter interrupts this process by overwriting the initializer, # then we'll catch that when we call _track_checkpointable. So this is # "best effort" to set the initializer with the highest restore UID. initializer = checkpoint_initializer shape = None new_variable = getter(name=name, shape=shape, dtype=dtype, initializer=initializer, **kwargs_for_getter) # If we set an initializer and the variable processed it, tracking will not # assign again. It will add this variable to our dependencies, and if there # is a non-trivial restoration queued, it will handle that. This also # handles slot variables. if not overwrite or isinstance(new_variable, CheckpointableBase): return self._track_checkpointable(new_variable, name=name, overwrite=overwrite) else: # TODO(allenl): Some variable types are not yet supported. Remove this # fallback once all get_variable() return types are Checkpointable. return new_variable
def __init__( self, # pylint: disable=super-init-not-called initial_value=None, trainable=None, caching_device=None, name=None, dtype=None, constraint=None, add_initializers_to=None, **unused_kwargs): """Creates a variable. Args: initial_value: A `Tensor`, or Python object convertible to a `Tensor`, which is the initial value for the Variable. The initial value must have a shape specified unless `validate_shape` is set to False. Can also be a callable with no argument that returns the initial value when called. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) trainable: If `True`, GradientTapes automatically watch uses of this Variable. caching_device: Optional device string or function describing where the Variable should be cached for reading. Defaults to the Variable's device. If not `None`, caches on another device. Typical use is to cache on the device where the Ops using the Variable reside, to deduplicate copying through `Switch` and other conditional statements. name: Optional name for the variable. Defaults to `'Variable'` and gets uniquified automatically. dtype: If set, initial_value will be converted to the given type. If None, either the datatype will be kept (if initial_value is a Tensor) or float32 will be used (if it is a Python object convertible to a Tensor). constraint: An optional projection function to be applied to the variable after being updated by an `Optimizer` (e.g. used to implement norm constraints or value constraints for layer weights). The function must take as input the unprojected Tensor representing the value of the variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. add_initializers_to: if not None and not in legacy graph mode, the initializer tensor will be added to this map instead of adding the assignment to the function. Raises: ValueError: If the initial value is not specified, or does not have a shape and `validate_shape` is `True`. RuntimeError: If called outside of a function definition. """ if context.executing_eagerly(): # If we've been init_scope()d out of the function definition nothing to do # here; we can't really do the capturing or conditional logic. resource_variable_ops.ResourceVariable.__init__( self, initial_value=initial_value, trainable=trainable, caching_device=caching_device, name=name, dtype=dtype, constraint=constraint) return with ops.init_scope(): self._in_graph_mode = not context.executing_eagerly() if initial_value is None: raise ValueError("initial_value must be specified.") init_from_fn = callable(initial_value) if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") if isinstance(initial_value, checkpointable.CheckpointInitialValue): self._maybe_initialize_checkpointable() self._update_uid = initial_value.checkpoint_position.restore_uid initial_value = initial_value.wrapped_value if trainable is None: trainable = True self._trainable = trainable self._save_slice_info = None self._initial_value = None self._initializer_op = None self._is_initialized_op = None self._graph_element = None self._cached_value = None # Store the graph key so optimizers know how to only retrieve variables from # this graph. Guaranteed to be the same as the eager graph_key. self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access with ops.init_scope(): shared_name = ops._name_from_scope_name(name) shared_name = "%s_%d" % (shared_name, ops.uid()) with ops.name_scope("Initializer"), ops.device(None): initial_value = ops.convert_to_tensor( initial_value() if init_from_fn else initial_value, name="initial_value", dtype=dtype) with ops.init_scope(): self._handle = resource_variable_ops.eager_safe_variable_handle( shape=initial_value.get_shape(), dtype=initial_value.dtype.base_dtype, shared_name=shared_name, name=name, graph_mode=self._in_graph_mode) self._shape = initial_value.shape self._unique_id = shared_name self._handle_name = shared_name + ":0" self._dtype = initial_value.dtype.base_dtype self._constraint = constraint assert initial_value is not None if self._in_graph_mode: with ops.init_scope(): outer_graph = ops.get_default_graph() lifted_initializer = lift_to_graph.lift_to_graph( initial_value, outer_graph)[initial_value] with ops.init_scope(): self._initial_value = lifted_initializer with ops.name_scope("IsInitialized"): self._is_initialized_op = ( resource_variable_ops.var_is_initialized_op( self._handle)) if initial_value is not None: with ops.name_scope("Assign") as n, ops.colocate_with( self._handle): self._initializer_op = resource_variable_ops.assign_variable_op( self._handle, lifted_initializer, name=n) with ops.name_scope("Read"), ops.colocate_with( self._handle): # Manually assign reads to the handle's device to avoid log # messages. with ops.device(self._handle.device): value = self._read_variable_op() self._graph_element = value ops.add_to_collection(ops.GraphKeys.GLOBAL_VARIABLES, self) else: if add_initializers_to is not None: add_initializers_to[self] = initial_value else: def assign_fn(): with ops.name_scope("Assign") as n, ops.colocate_with( self._handle): resource_variable_ops.assign_variable_op( self._handle, initial_value, name=n) # Returning values to keep tf.cond happy. return ops.convert_to_tensor(1) def not_assign_fn(): return ops.convert_to_tensor(0) # Note: this cond is always guaranteed to run because we're inside a # defun which will insert automatic control dependencies. control_flow_ops.cond( resource_variable_ops.var_is_initialized_op( self._handle), not_assign_fn, assign_fn) # After the handle has been created, set up a way to clean it up when # executing eagerly. We'll hold the only reference to the deleter, so that # when this object is garbage collected the deleter will be too. This # means ResourceVariables can be part of reference cycles without those # cycles being uncollectable. if not self._in_graph_mode: self._handle_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._handle, handle_device=self._handle.device) self._cached_shape_as_list = None