Example #1
0
  def container(self, container_name):
    """Returns a context manager that specifies the resource container to use.

    Overridden from `tf.Graph` to update both the init_scope container
    and the present inner container. This is necessary to make sure setting
    containers applies correctly both to created variables and to stateful
    ops.

    Args:
      container_name: container name string.

    Returns:
      A context manager for defining resource containers for stateful ops,
        yields the container name.
    """
    original_container = self._container
    # pylint: disable=protected-access
    with ops.init_scope():
      original_init_container = ops.get_default_graph()._container
    try:
      self._container = container_name
      with ops.init_scope():
        ops.get_default_graph()._container = container_name
      yield self._container
    finally:
      self._container = original_container
      with ops.init_scope():
        ops.get_default_graph()._container = original_init_container
Example #2
0
 def _real_mirrored_creator(devices, *args, **kwargs):  # pylint: disable=g-missing-docstring
   value_list = []
   for i, d in enumerate(devices):
     with ops.device(d):
       if i > 0:
         # Give replicas meaningful distinct names:
         var0name = value_list[0].name.split(":")[0]
         # We append a / to variable names created on replicas with id > 0 to
         # ensure that we ignore the name scope and instead use the given
         # name as the absolute name of the variable.
         kwargs["name"] = "%s/replica_%d/" % (var0name, i)
         # Initialize replicas with the same value:
         if context.executing_eagerly() or ops.inside_function():
           with ops.init_scope():
             kwargs["initial_value"] = array_ops.identity(
                 value_list[0].value())
         else:
           def initial_value_fn(device=d):
             with ops.device(device):
               return array_ops.identity(value_list[0].initial_value)
           kwargs["initial_value"] = initial_value_fn
       with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
         v = next_creator(*args, **kwargs)
       assert not isinstance(v, values.TPUMirroredVariable)
       value_list.append(v)
   return value_list
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    var_list = [ v for _,v in grads_and_vars]
    d_vars = []
    g_vars = []
    all_grads = [ g for g, _ in grads_and_vars ]
    for grad,var in grads_and_vars:
        if var in self.gan.d_vars():
            d_vars += [var]
        elif var in self.gan.g_vars():
            g_vars += [var]
        else:
            raise("Couldn't find var in g_vars or d_vars")

    with ops.init_scope():
        self.optimizer._create_slots([v for g,v in grads_and_vars])

    self._prepare()
    d_grads = all_grads[:len(d_vars)]
    if self.config.type == 'sga':
        Jgrads = tf.gradients(d_grads, d_vars, grad_ys=d_grads, stop_gradients=d_vars) + [tf.zeros_like(g) for g in g_vars]
    elif self.config.type == 'magnitude':
        consensus_reg = [tf.square(g) for g in d_grads if g is not None]
        Jgrads = tf.gradients(consensus_reg, d_vars) + [tf.zeros_like(g) for g in g_vars]
    else:
        consensus_reg = 0.5 * sum(
                tf.reduce_sum(tf.square(g)) for g in d_grads if g is not None
        )
        Jgrads = tf.gradients(consensus_reg, d_vars, stop_gradients=d_vars) + [tf.zeros_like(g) for g in g_vars]
    new_grads = [g+jg*self._beta if jg is not None else g for g,v,jg in zip(all_grads, var_list, Jgrads)]
    new_grads_and_vars = list(zip(new_grads, var_list)).copy()
    return self.optimizer.apply_gradients(new_grads_and_vars, global_step=global_step, name=name)
Example #4
0
  def _check_same_graph(self):
    """Checks that the module is not being connect to multiple Graphs.

    An instance of a Sonnet module 'owns' the variables it contains, and permits
    seamless variable sharing. As such, connecting a single module instance to
    multiple Graphs is not possible - this function will raise an error should
    that occur.

    Raises:
      DifferentGraphError: if the module is connected to a different Graph than
        it was previously used in.
    """
    with ops.init_scope():
      # We need `init_scope` incase we're running inside a defun. In that case
      # what we want is information about where the function will be called not
      # where the function is being built.
      current_graph = tf.get_default_graph()
      will_call_in_eager_context = tf.executing_eagerly()

    if self._graph is None:
      self._graph = current_graph
      self._set_module_info()

    if not will_call_in_eager_context:
      # Same graph checks only make sense when calling from graph mode (in eager
      # mode there is a single process level context where all modules are
      # created).
      if self._graph != current_graph:
        raise DifferentGraphError("Cannot connect module to multiple Graphs.")
Example #5
0
  def apply_gradients(self, grads_and_vars, name=None):
    """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs.
      name: Optional name for the returned operation.  Default to the name
        passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
    grads_and_vars = _filter_grads(grads_and_vars)
    var_list = [v for (_, v) in grads_and_vars]

    self._create_hypers()
    with ops.init_scope():
      self._create_slots(var_list)

    self._prepare(var_list)

    return distribute_ctx.get_replica_context().merge_call(
        self._distributed_apply, args=(grads_and_vars,), kwargs={"name": name})
Example #6
0
  def apply_gradients(self, grads_and_vars, name=None):
    """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      name: Optional name for the returned operation.  Default to the name
        passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
    grads_and_vars = _filter_grads(grads_and_vars)
    var_list = [v for (_, v) in grads_and_vars]
    if distribution_strategy_context.has_distribution_strategy():
      reduced_grads = merge_grads(grads_and_vars)
      grads_and_vars = zip(reduced_grads, var_list)

    with ops.init_scope():
      self._prepare()
      self._create_slots(var_list)
    update_ops = []

    def update_grad_to_var(grad, var):
      """Apply gradient to variable."""
      if isinstance(var, ops.Tensor):
        raise NotImplementedError("Trying to update a Tensor ", var)
      if isinstance(grad, ops.IndexedSlices):
        if var.constraint is not None:
          raise RuntimeError(
              "Cannot use a constraint function on a sparse variable.")
        return self._resource_apply_sparse_duplicate_indices(
            grad.values, var, grad.indices)
      update_op = self._resource_apply_dense(grad, var)
      if var.constraint is not None:
        with ops.control_dependencies([update_op]):
          return var.assign(var.constraint(var))
      else:
        return update_op

    with ops.name_scope(name, self._name) as name:
      for grad, var in grads_and_vars:
        scope_name = ("" if ops.executing_eagerly_outside_functions() else
                      "_" + var.op.name)
        with ops.name_scope("update" + scope_name):
          update_ops.append(update_grad_to_var(grad, var))
      # control dependencies does not work in per replica mode, please change
      # this once b/118841692 is fixed.
      # with ops.control_dependencies(update_ops):
      #   apply_updates = self._iterations.assign_add(1).op
      apply_updates = merge_update_step(update_ops, self.iterations)
      return apply_updates
Example #7
0
 def _real_mirrored_creator(devices, *args, **kwargs):  # pylint: disable=g-missing-docstring
   index = {}
   for i, d in enumerate(devices):
     with ops.init_scope(), ops.device(d):
       if i > 0:
         # Give replicas meaningful distinct names:
         var0name = index[devices[0]].name.split(":")[0]
         # We append a / to variable names created on replicas with id > 0 to
         # ensure that we ignore the name scope and instead use the given
         # name as the absolute name of the variable.
         kwargs["name"] = "%s/replica_%d/" % (var0name, i)
         # Initialize replicas with the same value:
         def initial_value_fn(device=d):
           if context.executing_eagerly():
             init_value = index[devices[0]].value()
             return array_ops.identity(init_value)
           else:
             with ops.device(device):
               init_value = index[devices[0]].initial_value
               return array_ops.identity(init_value)
         kwargs["initial_value"] = initial_value_fn
       with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
         # Don't record operations (e.g. other variable reads) during
         # variable creation.
         with tape.stop_recording():
           v = next_creator(*args, **kwargs)
       assert not isinstance(v, values.DistributedVariable)
       index[d] = v
   return index
Example #8
0
  def value_tensors(self):
    """Create value `Tensor`s for this object's attributes.

    Does not require that the Python object has been created. Used for
    restore-on-create when executing eagerly.

    Returns:
      A dictionary mapping from object attribute names to `Tensor`s.
    """
    value_tensors = {}
    for serialized_tensor in self.object_proto.attributes:
      checkpoint_key = serialized_tensor.checkpoint_key
      dtype = self._checkpoint.dtype_map[checkpoint_key]
      base_type = dtype.base_dtype
      with ops.init_scope():
        with ops.device("/cpu:0"):
          # Run the restore itself on the CPU.
          value, = io_ops.restore_v2(
              prefix=self._checkpoint.save_path,
              tensor_names=[checkpoint_key],
              shape_and_slices=[""],
              dtypes=[base_type],
              name="%s_checkpoint_read" % (serialized_tensor.name,))
        # Copy the value to the current device if necessary.
        value_tensors[serialized_tensor.name] = array_ops.identity(value)
      return value_tensors
Example #9
0
 def _capture_by_value(
     self,
     op_type,
     inputs,
     dtypes,  # pylint: disable=redefined-outer-name
     input_types=None,
     name=None,
     attrs=None,
     op_def=None,
     compute_shapes=True,
     compute_device=True):
   # When capturing by value, do the read outside
   reverse_captures = dict((v, k) for k, v in self.captures.items())
   uncaptured_inputs = [reverse_captures.get(t, t) for t in inputs]
   with ops.init_scope():
     if context.executing_eagerly():
       attr_list = ("dtype", int(attrs["dtype"].type))
       value, = execute.execute(
           compat.as_bytes(op_type), 1, uncaptured_inputs, attr_list,
           context.context())
     else:
       op = ops.get_default_graph().create_op(
           op_type, uncaptured_inputs, dtypes, input_types, name, attrs,
           op_def, compute_shapes, compute_device)
       value = op.outputs[0]
   captured_value = self.capture(value)
   return captured_value.op
def _default_getter(name, shape, dtype, initializer=None,
                    partition_info=None, **kwargs):
  """A pared-down version of get_variable which does not reuse variables."""
  dtype = dtypes.as_dtype(dtype)
  shape_object = tensor_shape.as_shape(shape)
  with ops.init_scope():
    if initializer is None:
      initializer, initializing_from_value = (
          variable_scope._get_default_variable_store()._get_default_initializer(  # pylint: disable=protected-access
              name=name, shape=shape_object, dtype=dtype))
    else:
      initializing_from_value = not callable(initializer)
    # Same logic as get_variable
    variable_dtype = dtype.base_dtype
    if initializing_from_value:
      if shape is not None:
        raise ValueError("If initializer is a constant, do not specify shape.")
      initial_value = initializer
    else:
      # Instantiate initializer if provided initializer is a type object.
      if isinstance(initializer, type(init_ops.Initializer)):
        initializer = initializer(dtype=dtype)
      def initial_value():
        return initializer(
            shape_object.as_list(), dtype=dtype, partition_info=partition_info)
    return resource_variable_ops.ResourceVariable(
        initial_value=initial_value,
        name=name,
        dtype=variable_dtype,
        **kwargs
    )
def load_function_def_library(library):
  """Load a set of functions as concrete functions without captured inputs.

  Functions names are manipulated during load such that they do not overlap
  with previously created ones.

  Args:
    library: FunctionDefLibrary proto message.

  Returns:
    Map of original function names in the library to instances of
    `ConcreteFunction` without captured inputs.

  Raises:
    ValueError: if functions dependencies have a cycle.
  """
  functions = {}

  for fdef in _sort_function_defs(library):
    copy = _fix_fdef(fdef, functions)

    func_graph = function_def_lib.function_def_to_graph(copy)
    for dep in _list_function_deps(fdef):
      functions[dep].add_to_graph(func_graph)
    func = function_lib.ConcreteFunction(func_graph)
    func.add_to_graph()

    functions[fdef.signature.name] = func

    # Also register the gradients in the current root context.
    with ops.init_scope():
      func._register_gradient()  # pylint: disable=protected-access

  return functions
Example #12
0
 def initialize_variables():
   for v, init in initializer_map.items():
     with ops.init_scope():
       if resource_variable_ops.var_is_initialized_op(v.handle):
         # Ignore variables which are already initialized at trace time.
         continue
     v.assign(lift_to_graph.lift_to_graph(
         [init], ops.get_default_graph())[init])
Example #13
0
 def __init__(self, path):
   """Record the full path to the asset."""
   # The init_scope prevents functions from capturing `path` in an
   # initialization graph, since it is transient and should not end up in a
   # serialized function body.
   with ops.init_scope(), ops.device("CPU"):
     self._path = ops.internal_convert_to_tensor(path, dtype=dtypes.string,
                                                 name="asset_path")
Example #14
0
 def _get_beta_accumulators(self):
   with ops.init_scope():
     if context.executing_eagerly():
       graph = None
     else:
       graph = ops.get_default_graph()
     return (self._get_non_slot_variable("beta1_power", graph=graph),
             self._get_non_slot_variable("beta2_power", graph=graph))
Example #15
0
def create_file_writer_v2(logdir,
                          max_queue=None,
                          flush_millis=None,
                          filename_suffix=None,
                          name=None):
  """Creates a summary file writer for the given log directory.

  Args:
    logdir: a string specifying the directory in which to write an event file.
    max_queue: the largest number of summaries to keep in a queue; will
     flush once the queue gets bigger than this. Defaults to 10.
    flush_millis: the largest interval between flushes. Defaults to 120,000.
    filename_suffix: optional suffix for the event file name. Defaults to `.v2`.
    name: a name for the op that creates the writer.

  Returns:
    A SummaryWriter object.
  """
  if logdir is None:
    raise ValueError("logdir cannot be None")
  inside_function = ops.inside_function()
  with ops.name_scope(name, "create_file_writer") as scope, ops.device("cpu:0"):
    # Run init inside an init_scope() to hoist it out of tf.functions.
    with ops.init_scope():
      if context.executing_eagerly():
        _check_create_file_writer_args(
            inside_function,
            logdir=logdir,
            max_queue=max_queue,
            flush_millis=flush_millis,
            filename_suffix=filename_suffix)
      logdir = ops.convert_to_tensor(logdir, dtype=dtypes.string)
      if max_queue is None:
        max_queue = constant_op.constant(10)
      if flush_millis is None:
        flush_millis = constant_op.constant(2 * 60 * 1000)
      if filename_suffix is None:
        filename_suffix = constant_op.constant(".v2")
      # Prepend the PID and a process-local UID to the filename suffix to avoid
      # filename collisions within the machine (the filename already contains
      # the hostname to avoid cross-machine collisions).
      unique_prefix = constant_op.constant(".%s.%s" % (os.getpid(), ops.uid()))
      filename_suffix = unique_prefix + filename_suffix
      # Use a unique shared_name to prevent resource sharing.
      if context.executing_eagerly():
        shared_name = context.shared_name()
      else:
        shared_name = ops.name_from_scope_name(scope)  # pylint: disable=protected-access
      return ResourceSummaryWriter(
          shared_name=shared_name,
          init_op_fn=functools.partial(
              gen_summary_ops.create_summary_file_writer,
              logdir=logdir,
              max_queue=max_queue,
              flush_millis=flush_millis,
              filename_suffix=filename_suffix),
          name=name,
          v2=True)
Example #16
0
  def __init__(self, dist, coord, replica_id, device_map, variable_creator_fn,
               fn, args, kwargs):
    super(_MirroredReplicaThread, self).__init__()
    self.coord = coord
    self.distribution = dist
    self.device_map = device_map
    self.replica_id = replica_id
    self.variable_creator_fn = variable_creator_fn
    # State needed to run and return the results of `fn`.
    self.main_fn = fn
    self.main_args = args
    self.main_kwargs = kwargs
    self.main_result = None
    self.done = False
    # State needed to run the next merge_call() (if any) requested via
    # ReplicaContext.
    self.merge_fn = None
    self.merge_args = None
    self.merge_kwargs = None
    self.merge_result = None
    self.captured_name_scope = None
    self.captured_var_scope = None
    # We use a thread.Event for the main thread to signal when this
    # thread should start running (`should_run`), and another for
    # this thread to transfer control back to the main thread
    # (`has_paused`, either when it gets to a
    # `get_replica_context().merge_call` or when `fn` returns). In
    # either case the event starts cleared, is signaled by calling
    # set(). The receiving thread waits for the signal by calling
    # wait() and then immediately clearing the event using clear().
    self.should_run = threading.Event()
    self.has_paused = threading.Event()
    # These fields have to do with inheriting various contexts from the
    # parent thread:
    ctx = context.context()
    self.in_eager = ctx.executing_eagerly()
    self.record_thread_local_context_fields()
    # pylint: disable=protected-access
    if not ctx._context_handle:
      ctx._initialize_handle_and_devices()
    self.context_device_policy = (
        pywrap_tensorflow.TFE_ContextGetDevicePlacementPolicy(
            ctx._context_handle))
    self.graph = ops.get_default_graph()
    with ops.init_scope():
      self._init_in_eager = context.executing_eagerly()
      self._init_graph = ops.get_default_graph()

    self._variable_creator_stack = self.graph._variable_creator_stack[:]
    self._var_scope = variable_scope.get_variable_scope()
    # Adding a "/" at end lets us re-enter this scope later.
    self._name_scope = self.graph.get_name_scope()
    if self._name_scope:
      self._name_scope += "/"
    if self.replica_id > 0:
      if not self._name_scope:
        self._name_scope = ""
      self._name_scope += "replica_%d/" % self.replica_id
 def add_var(x):
   if not v_holder:
     v = variables.Variable([1., 2.])
     v_holder.append(v)
     already_initialized = variables.Variable(3.)
     with ops.init_scope():
       already_initialized.assign(10.)
     v_holder.append(already_initialized)
   return v_holder[0] + v_holder[1] + x
Example #18
0
 def restore(self, checkpointable):
   """Restore this value into `checkpointable`."""
   with ops.init_scope():
     if self.bind_object(checkpointable):
       # This object's correspondence with a checkpointed object is new, so
       # process deferred restorations for it and its dependencies.
       restore_ops = checkpointable._restore_from_checkpoint_position(self)  # pylint: disable=protected-access
       if restore_ops:
         self._checkpoint.restore_ops.extend(restore_ops)
Example #19
0
 def init_fn():
   self.assertTrue(context.executing_eagerly())
   with ops.init_scope():
     self.assertTrue(context.executing_eagerly())
     context_switches = context.context().context_switches
     self.assertEqual(len(context_switches.stack), 1)
     self.assertFalse(context_switches.stack[0].is_building_function)
     self.assertEqual(context_switches.stack[0].enter_context_fn,
                      context.eager_mode)
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    var_list = [ v for _,v in grads_and_vars]
    with ops.init_scope():
        zt = [self._get_or_make_slot(v, v, "zt", self._name) for _,v in grads_and_vars]
        slots_list = []
        for name in self.optimizer.get_slot_names():
            for var in self.optimizer.variables():
                self._get_or_make_slot(var, var, "zt", "zt")
    self._prepare()

    def _name(post, s):
        ss = s.split(":")
        return ss[0] + "_" + post + "_dontsave"
    zt = [self.get_slot(v, "zt") for _,v in grads_and_vars]
    xt = [tf.Variable(v, name=_name("gigaxt",v.name)) for _,v in grads_and_vars]
    tmp = [tf.Variable(v, name=_name("gigatmp",v.name)) for _,v in grads_and_vars]
    xslots_list = []
    zslots_list = []
    tmpslots_list = []
    slots_vars = []
    for name in self.optimizer.get_slot_names():
        for var in self.optimizer.variables():
            slots_vars += [var]
            xslots_list.append(tf.Variable(var))
            zslots_list.append(self._get_or_make_slot(var, var, "zt", "zt"))
            tmpslots_list.append(tf.Variable(var, name=_name("gigaslottmp", var.name)))


    restored_vars = var_list + slots_vars
    zt_vars = zt + zslots_list
    xt_vars = xt + xslots_list
    tmp_vars = tmp + tmpslots_list
    all_grads = [ g for g, _ in grads_and_vars ]
    # store variables for resetting

    op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, restored_vars)]) # store tmp_vars

    with tf.get_default_graph().control_dependencies([op1]):
        op2 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op2]):
            op3 = tf.group(*[tf.assign(w, v) for w,v in zip(xt_vars, restored_vars)]) # store xt^+1 in xt_vars
            with tf.get_default_graph().control_dependencies([op3]):
                op4 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, zt_vars)]) # restore vars to zt (different weights)
                with tf.get_default_graph().control_dependencies([op4]):
                    op5 = self.optimizer2.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) # zt+1
                    with tf.get_default_graph().control_dependencies([op5]):
                        zt1_xt1 = [_restored_vars - _xt1_vars for _restored_vars, _xt1_vars in zip(restored_vars, xt_vars)]
                        St1 = [tf.minimum(1.0, tf.norm(_zt1_vars-_zt_vars) / tf.norm(_zt1_xt1)) for _zt1_vars, _zt_vars, _zt1_xt1 in zip(restored_vars, zt_vars, zt1_xt1)]
                        self.gan.add_metric('st1',tf.reduce_mean(tf.add_n(St1)/len(St1)))
                        #self.gan.add_metric('xzt1',tf.norm(xt_vars[0]-zt_vars[0]))
                        nextw = [_xt_t1 + _St1 * _zt1_xt1 for _xt_t1, _St1, _zt1_xt1 in zip(xt_vars, St1, zt1_xt1)]
                        op6 = tf.group(*[tf.assign(w, v) for w,v in zip(zt_vars, restored_vars)]) # set zt+1
                        with tf.get_default_graph().control_dependencies([op6]):
                            op7 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, nextw)]) # set xt+1
                            with tf.get_default_graph().control_dependencies([op7]):
                                return tf.no_op()
Example #21
0
    def create_variable():
      with ops.init_scope():
        initial_value = random_ops.random_uniform(
            (2, 2), maxval=1000000, dtype=dtypes.int64)

      if not a:
        with ops.device("CPU:0"):
          a.append(resource_variable_ops.ResourceVariable(initial_value))

      return a[0].read_value()
Example #22
0
def _get_unique_name(name):
  """Returns a name that is unique in the root graph of `func_graph`.

  Args:
    name: String to uniquify.

  Returns:
    A string.
  """
  with ops.init_scope():
    return ops.get_default_graph().unique_name(name)
Example #23
0
  def load(self, tags):
    """Creates an object from the MetaGraph identified by `tags`."""
    meta_graph_def = self.get_meta_graph_def_from_tags(tags)
    load_graph_returns = [None]
    wrapped = wrap_function.wrap_function(
        functools.partial(self.load_graph, load_graph_returns, meta_graph_def),
        signature=[])
    saver, = load_graph_returns
    self.restore_variables(wrapped, saver)
    with wrapped.graph.as_default():
      init_op = loader_impl.get_init_op(
          meta_graph_def) or monitored_session.Scaffold.default_local_init_op()
      # Add a dummy Tensor we know we can fetch to add control dependencies to.
      init_anchor = constant_op.constant(0., name="dummy_fetch")

    root = tracking.AutoTrackable()
    asset_feed_tensors = []
    asset_paths = []
    for tensor_name, value in loader_impl.get_asset_tensors(
        self._export_dir, meta_graph_def).items():
      asset_feed_tensors.append(wrapped.graph.as_graph_element(tensor_name))
      asset_paths.append(tracking.TrackableAsset(value))
    init_fn = wrapped.prune(
        feeds=asset_feed_tensors,
        fetches=[init_anchor, wrapped.graph.as_graph_element(init_op)])
    initializer = _Initializer(init_fn, asset_paths)
    # pylint: disable=protected-access
    local_init_op, _ = initializer._initialize()
    # pylint: enable=protected-access
    with ops.init_scope():
      if not context.executing_eagerly():
        ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, local_init_op)
        for variable in wrapped.graph.get_collection_ref(
            ops.GraphKeys.LOCAL_VARIABLES):
          # pylint: disable=protected-access
          variable._initializer_op = local_init_op
          # pylint: enable=protected-access
    root.initializer = initializer
    root.asset_paths = asset_paths
    signature_functions = self._extract_signatures(wrapped, meta_graph_def)

    root.signatures = signature_serialization.create_signature_map(
        signature_functions)
    root.variables = list(wrapped.graph.variables)
    root.tensorflow_version = (
        meta_graph_def.meta_info_def.tensorflow_version)
    root.tensorflow_git_version = (
        meta_graph_def.meta_info_def.tensorflow_git_version)
    root.graph = wrapped.graph
    root.prune = wrapped.prune
    return root
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    var_list = [ v for _,v in grads_and_vars]
    d_vars = []
    g_vars = []
    for grad,var in grads_and_vars:
        if var in self.gan.d_vars():
            d_vars += [var]
        elif var in self.gan.g_vars():
            g_vars += [var]
        else:
            raise("Couldn't find var in g_vars or d_vars")

    with ops.init_scope():
        v1 = [self._zeros_slot(v, "v1", self._name) for _,v in grads_and_vars]
        if self.config.include_slots:
            for name in self.optimizer.get_slot_names():
                for var in self.optimizer.variables():
                    self._zeros_slot(var, "pm", "pm")
    self._prepare()

    v1 = [self.get_slot(v, "v1") for _,v in grads_and_vars]
    slots_list = []
    slots_vars = []
    if self.config.include_slots:
        for name in self.optimizer.get_slot_names():
            for var in self.optimizer.variables():
                slots_vars += [var]
                slots_list.append(self._zeros_slot(var, "pm", "pm"))


    current_vars = var_list + slots_vars
    tmp_vars = v1 + slots_list
    all_grads = [ g for g, _ in grads_and_vars ]

    op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, current_vars)]) # store variables

    with tf.get_default_graph().control_dependencies([op1]):
        # store g2
        #op3 = tf.group(*[tf.assign_sub(v, self._lr_t*grad) for grad,v in grads_and_vars])
        op3 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op3]):

            def pmcombine(_v1,_v2):
                return _v2 + (_v2 - _v1)

            combined = [pmcombine(_v1, _v2) for _v1, _v2 in zip(tmp_vars, current_vars)]
            # restore v1, slots
            op5 = tf.group(*[ tf.assign(w,v) for w,v in zip(current_vars, combined)])
            with tf.get_default_graph().control_dependencies([op5]):
                return tf.no_op()
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    d_vars = []
    g_vars = []
    for grad,var in grads_and_vars:
        if var in self.gan.d_vars():
            d_vars += [var]
        elif var in self.gan.g_vars():
            g_vars += [var]
        else:
            raise Exception("Couldn't find var in g_vars or d_vars")

    if self.config.apply_on == "discriminator":
        depth_vars = d_vars
    else:
        depth_vars = d_vars + g_vars
    with ops.init_scope():
        [self._get_or_make_slot(v, v, "depth", self.name) for v in depth_vars]
        self.optimizer._create_slots([v for g,v in grads_and_vars])
        for name in self.optimizer.get_slot_names():
            for var in self.optimizer.variables():
                self._zeros_slot(var, "depth", self.name)

    self._prepare()
    depth_slots = [self.get_slot(v, "depth") for v in depth_vars]
    for name in self.optimizer.get_slot_names():
        for var in self.optimizer.variables():
            depth_vars += [var]
            depth_slots += [self._zeros_slot(var, "depth", self.name)]

    def calculate_depth(grads_and_vars_k,k=0):
        if(k == 0):
            return tf.group(*[tf.assign(v,nv) for v,nv in zip(depth_vars, depth_slots)])

        op2 = self.optimizer.apply_gradients(grads_and_vars_k, global_step=global_step, name=name)
        with tf.get_default_graph().control_dependencies([op2]):
            w_k_combined = [self._decay *w_k_1 + (1.-self._decay)*w_hat for w_hat, w_k_1 in zip(depth_slots, depth_vars)]
            op3 = tf.group(*[tf.assign(w, v) for w,v in zip(depth_slots, w_k_combined)]) # store variables
            with tf.get_default_graph().control_dependencies([op3]):
                d_loss, g_loss = self.gan.loss.sample
                d_grads = tf.gradients(d_loss, d_vars)
                g_grads = tf.gradients(g_loss, g_vars)
                grads_k_1 = d_grads + g_grads
                grads_and_vars_k_1 = list(zip(grads_k_1,depth_vars)).copy()
                return calculate_depth(grads_and_vars_k_1,k-1)

    op1 = tf.group(*[tf.assign(w, v) for w,v in zip(depth_slots, depth_vars)]) # store variables
    with tf.get_default_graph().control_dependencies([op1]):
        opd = calculate_depth(grads_and_vars, self._depth)
        with tf.get_default_graph().control_dependencies([opd]):
            return tf.no_op()
Example #26
0
 def _get_tensor_from_node(self, node_id):
   """Resolves a node id into a tensor to be captured for a function."""
   with ops.init_scope():
     obj = self._nodes[node_id]
     if resource_variable_ops.is_resource_variable(obj):
       return obj.handle
     elif isinstance(obj, tracking.TrackableAsset):
       return obj.asset_path
     elif tensor_util.is_tensor(obj):
       return obj
     elif isinstance(obj, tracking.CapturableResource):
       # Note: this executes restored functions in the CapturableResource.
       return obj.resource_handle
     raise ValueError("Can't convert node %s to tensor" % (type(obj)))
Example #27
0
  def _initialize_uninitialized_variables(self, initializer_map):
    """Make and call a `ConcreteFunction` which initializes variables."""

    # Note: using defun here avoids an infinite recursion.
    @function_lib.defun
    def initialize_variables():
      for v, init in initializer_map.items():
        with ops.init_scope():
          if resource_variable_ops.var_is_initialized_op(v.handle):
            # Ignore variables which are already initialized at trace time.
            continue
        v.assign(lift_to_graph.lift_to_graph(
            init, ops.get_default_graph())[init])

    with ops.init_scope():
      return initialize_variables.get_concrete_function()()
  def _init_from_args(self, name, shared_name):  # pylint: disable=invalid-name
    """Initialize the CriticalSection from constructor arguments."""
    with ops.name_scope(name, "CriticalSection", []) as name:
      with ops.init_scope():
        # pylint: disable=protected-access
        container = ops.get_default_graph()._container
        # pylint: enable=protected-access
        if shared_name is None:
          shared_name = name
        if container is None:
          container = ""
        self._handle = gen_resource_variable_ops.mutex_v2(
            shared_name=shared_name, container=container, name=name)

    if not context.executing_eagerly():
      ops.add_to_collections(CRITICAL_SECTIONS, self)
Example #29
0
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    all_vars = [ v for _,v in grads_and_vars]
    d_vars = []
    g_vars = []
    all_grads = [ g for g, _ in grads_and_vars ]
    for grad,var in grads_and_vars:
        if var in self.gan.d_vars():
            d_vars += [var]
        elif var in self.gan.g_vars():
            g_vars += [var]
        else:
            raise("Couldn't find var in g_vars or d_vars")

    with ops.init_scope():
        self.optimizer._create_slots([v for g,v in grads_and_vars])
    self._prepare()

    d_grads = all_grads[:len(d_vars)]
    g_grads = all_grads[len(d_vars):]
    if self.config.finite_differences:
        return self.finite_differences(grads_and_vars, global_step, name, d_vars, g_vars, d_grads, g_grads)
    dc_grads = sum([tf.reduce_sum(tf.square(d)) for d in d_grads])
    gc_grads = sum([tf.reduce_sum(tf.square(g)) for g in g_grads])
    gamma12 = tf.gradients(gc_grads, d_vars) + [tf.zeros_like(g) for g in g_vars]
    gamma21 = [tf.zeros_like(d) for d in d_vars] + tf.gradients(dc_grads, g_vars)

    gamma12 = [ tf.zeros_like(ddg) if _dg is None else _dg for ddg, _dg in zip(all_vars, gamma12) ]
    gamma21 = [ tf.zeros_like(ddg) if _dg is None else _dg for ddg, _dg in zip(all_vars, gamma21) ]
    __gamma12 = [ tf.reduce_sum(_gamma12) for _gamma12 in gamma12 ]
    __gamma21 = [ tf.reduce_sum(_gamma21) for _gamma21 in gamma21 ]
    #gamma12_metric = self.gan.ops.squash(sum(gamma12))
    gamma12_metric = self.gan.ops.squash(sum(__gamma12))
    self.gan.add_metric('gamma12', gamma12_metric)
    gamma21_metric = self.gan.ops.squash(sum(__gamma21))
    self.gan.add_metric('gamma21', gamma21_metric)
   
    new_grads = []
    for _gamma12, _gamma21, _grads in zip(gamma12, gamma21, all_grads):
        Eo = _grads - \
             0.5*self._alpha*_gamma21 +\
             0.5*self._alpha*_gamma12
        new_grads += [ Eo ]

    new_grads_and_vars = list(zip(new_grads, all_vars)).copy()

    return self.optimizer.apply_gradients(new_grads_and_vars, global_step=global_step, name=name)
Example #30
0
  def _initialize_uninitialized_variables(self, initializer_map):
    """Make and call a `ConcreteFunction` which initializes variables."""

    # Note: using defun here avoids an infinite recursion.
    # Note: there is no reason not to autograph once the overhead is negligible.
    @function_lib.defun(autograph=False)  # tf.function internal, pure graph
    def initialize_variables():
      for v, init in initializer_map.items():
        with ops.init_scope():
          if resource_variable_ops.var_is_initialized_op(v.handle):
            # Ignore variables which are already initialized at trace time.
            continue
        v.assign(lift_to_graph.lift_to_graph(
            [init], ops.get_default_graph())[init])

    with ops.init_scope():
      return initialize_variables.get_concrete_function()()
Example #31
0
def make_variable(name,
                  shape=None,
                  dtype=dtypes.float32,
                  initializer=None,
                  trainable=None,
                  caching_device=None,
                  validate_shape=True,
                  constraint=None,
                  use_resource=None,
                  collections=None,
                  synchronization=tf_variables.VariableSynchronization.AUTO,
                  aggregation=tf_variables.VariableAggregation.NONE,
                  partitioner=None):  # pylint: disable=unused-argument
    """Temporary util to create a variable (relies on `variable_scope.variable`).

  Some reuse-related technicalities prevent us from using
  `variable_scope.get_variable()` directly, so we use a subcomponent
  that has fewer constraints (`variable_scope.variable()`).

  In the longer term, it seems like a similar "default variable creator" method
  should exist in `Trackable` instead. When this happens, we can get
  rid of this temporary solution.

  TODO(fchollet): remove this method when no longer needed.

  Arguments:
    name: Variable name.
    shape: Variable shape.
    dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
    initializer: Initializer instance (callable).
    trainable: Whether the variable should be part of the layer's
      "trainable_variables" (e.g. variables, biases)
      or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
      Note, if the current variable scope is marked as non-trainable
      then this parameter is ignored and any added variables are also
      marked as non-trainable. `trainable` defaults to `True` unless
      `synchronization` is set to `ON_READ`.
    caching_device: Passed to `tf.Variable`.
    validate_shape: Passed to `tf.Variable`.
    constraint: Constraint instance (callable).
    use_resource: Whether to use a `ResourceVariable`.
    collections: List of graph collections keys. The new variable is added to
      these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
    synchronization: Indicates when a distributed a variable will be
      aggregated. Accepted values are constants defined in the class
      `tf.VariableSynchronization`. By default the synchronization is set to
      `AUTO` and the current `DistributionStrategy` chooses
      when to synchronize. If `synchronization` is set to `ON_READ`,
      `trainable` must not be set to `True`.
    aggregation: Indicates how a distributed variable will be aggregated.
      Accepted values are constants defined in the class
      `tf.VariableAggregation`.
    partitioner: Not handled at this time.

  Returns:
    Variable instance.
  """
    initializing_from_value = False
    if initializer is not None and not callable(initializer):
        initializing_from_value = True

    with ops.init_scope():
        if initializing_from_value:
            init_val = initializer
            variable_dtype = None
        else:
            # Instantiate initializer if provided initializer is a type object.
            if isinstance(
                    initializer,
                (type(init_ops.Initializer), type(init_ops_v2.Initializer))):
                initializer = initializer()
            init_val = lambda: initializer(shape, dtype=dtype)
            variable_dtype = dtype.base_dtype
    if use_resource is None:
        use_resource = True

    # TODO(apassos,rohanj) figure out how to remove collections from here so we
    # can remove the V1.
    variable_shape = tensor_shape.TensorShape(shape)
    return tf_variables.VariableV1(
        initial_value=init_val,
        name=name,
        trainable=trainable,
        caching_device=caching_device,
        dtype=variable_dtype,
        validate_shape=validate_shape,
        constraint=constraint,
        use_resource=use_resource,
        collections=collections,
        synchronization=synchronization,
        aggregation=aggregation,
        shape=variable_shape if variable_shape else None)
Example #32
0
def func_graph_from_py_func(name,
                            python_func,
                            args,
                            kwargs,
                            signature=None,
                            func_graph=None,
                            experimental_autograph=False,
                            add_control_dependencies=True,
                            arg_names=None,
                            op_return_value=None):
    """Returns a `FuncGraph` generated from `python_func`.

  Args:
    name: an identifier for the function.
    python_func: the Python function to trace.
    args: the positional args with which the Python function should be called;
      ignored if a signature is provided.
    kwargs: the keyword args with which the Python function should be called;
      ignored if a signature is provided.
    signature: a possibly nested sequence of `TensorSpecs` specifying the shapes
      and dtypes of the arguments. When a signature is provided, `args` and
      `kwargs` are ignored, and `python_func` is traced with Tensors conforming
      to `signature`. If `None`, the shapes and dtypes are inferred from the
      inputs.
    func_graph: Optional. An instance of FuncGraph. If provided, we will use
      this graph else a new one is built and returned.
    experimental_autograph: whether to use autograph to compile `python_func`.
      See https://www.tensorflow.org/guide/autograph for more information.
    add_control_dependencies: If True, automatically adds control dependencies
      to ensure program order matches execution order and stateful ops always
      execute.
    arg_names: Optional list of argument names, used to give input placeholders
      recognizable names.
    op_return_value: Optional. A Tensor. If set and `python_func` returns
      Operations, those return values will be replaced with this value. If not
      set, returning an Operation triggers an error.

  Returns:
    A FuncGraph.

  Raises:
    TypeError: If any of `python_func`'s return values is neither `None` nor a
      `Tensor`.
  """
    if op_return_value is not None:
        assert isinstance(op_return_value, ops.Tensor), op_return_value
    if func_graph is None:
        func_graph = FuncGraph(name)
    assert isinstance(func_graph, FuncGraph)
    if add_control_dependencies:
        control_manager = AutomaticControlDependencies
    else:
        control_manager = ops.NullContextmanager
    with func_graph.as_default(), control_manager() as a:
        current_scope = variable_scope.get_variable_scope()
        default_use_recource = current_scope.use_resource
        current_scope.set_use_resource(True)

        if signature is not None:
            args = signature
            kwargs = {}

        func_args = _get_defun_inputs_from_args(args, arg_names)
        func_kwargs = _get_defun_inputs_from_kwargs(kwargs)

        # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`.
        # Variables to help check whether mutation happens in calling the function
        # Copy the recursive list, tuple and map structure, but not base objects
        func_args_before = nest.pack_sequence_as(func_args,
                                                 nest.flatten(func_args))
        func_kwargs_before = nest.pack_sequence_as(func_kwargs,
                                                   nest.flatten(func_kwargs))

        def convert(x):
            """Converts a function output to a Tensor."""
            if x is None:
                return None
            if op_return_value is not None and isinstance(x, ops.Operation):
                # TODO(b/79881896): we currently can't capture external control deps, so
                # this won't work if x needs to be captured (i.e. if python_func returns
                # captured Operations).
                with ops.control_dependencies([x]):
                    x = array_ops.identity(op_return_value)
            else:
                try:
                    x = ops.convert_to_tensor_or_indexed_slices(x)
                except (ValueError, TypeError):
                    raise TypeError(
                        "To be compatible with tf.contrib.eager.defun, Python functions "
                        "must return zero or more Tensors; in compilation of %s, found "
                        "return value of type %s, which is not a Tensor." %
                        (str(python_func), type(x)))
            if add_control_dependencies:
                x = a.mark_as_return(x)
            return x

        this_tape = tape.push_new_tape()
        try:
            if experimental_autograph:
                from tensorflow.python import autograph  # pylint: disable=g-import-not-at-top
                func_outputs = autograph.converted_call(
                    python_func, None,
                    autograph.ConversionOptions(
                        verbose=True,
                        recursive=True,
                        strip_decorators=(function.defun, ),
                        optional_features=(),
                    ), *func_args, **func_kwargs)
            else:
                func_outputs = python_func(*func_args, **func_kwargs)
            # invariant: `func_outputs` contains only Tensors and `None`s.
            func_outputs = nest.map_structure(convert, func_outputs)

            check_mutation(func_args_before, func_args)
            check_mutation(func_kwargs_before, func_kwargs)
        finally:
            tape.pop_tape(this_tape)
            current_scope.set_use_resource(default_use_recource)

        # Variables in `func_args`, `func_kwargs` should be explicit inputs
        # to the function, not captured inputs.
        tape_variables = this_tape.watched_variables()
        arg_variables = set()
        inputs = []
        for arg in nest.flatten(func_args) + nest.flatten(func_kwargs):
            if isinstance(arg, resource_variable_ops.ResourceVariable):
                try:
                    resource_placeholder = func_graph.captures.pop(arg.handle)
                    arg_variables.add(arg)
                except KeyError:
                    # This case occurs if a Variable among the inputs is not actually
                    # used by the function; we still add an explicit input for it
                    # because the user should presumably pass the Variable as an input
                    # to the corresponding graph function.
                    resource_placeholder = _create_substitute_placeholder(
                        arg.handle)
                inputs.append(resource_placeholder)
            elif isinstance(arg, ops.Tensor):
                inputs.append(arg)
        variables = [v for v in tape_variables if v not in arg_variables]
        func_graph.inputs = inputs + list(func_graph.captures.values())

        func_graph.structured_outputs = func_outputs
        # Returning a closed-over tensor does not trigger convert_to_tensor.
        func_graph.outputs.extend(
            func_graph.capture(x)
            for x in flatten(func_graph.structured_outputs) if x is not None)

        func_graph.variables = variables

    # Register any other functions defined in the graph.
    with ops.init_scope():
        if context.executing_eagerly():
            for f in func_graph._functions.values():  # pylint: disable=protected-access
                # TODO(ashankar): What about the gradient registry?
                context.add_function(f._c_func.func)  # pylint: disable=protected-access

    return func_graph
Example #33
0
def _create_keras_history_helper(tensors, processed_ops, created_layers):
    """Helper method for `create_keras_history`.

  Args:
    tensors: A structure of Tensors for which to create Keras metadata.
    processed_ops: Set. TensorFlow operations that have already been wrapped in
      `TensorFlowOpLayer` instances.
    created_layers: List. The `TensorFlowOpLayer` instances created.

  Returns:
    Tuple. First element is the updated set of TensorFlow Operations that
    have been wrapped in `TensorFlowOpLayer` instances. Second element is
    a list of the `TensorFlowOpLayer` instances created.
  """
    if ops.executing_eagerly_outside_functions():
        raise ValueError(
            '`create_keras_history` should only be called if eager is disabled!'
        )
    # Import of `base_layer` needed in order to create `TensorFlowOpLayer`.
    # Cannot be imported at top because of circular dependencies.
    # TODO(omalleyt): Resolve circular dependency.
    from tensorflow.python.keras.engine import base_layer  # pylint: disable=g-import-not-at-top
    tensor_list = nest.flatten(tensors)
    sparse_ops = []
    ragged_tensors = []
    for tensor in tensor_list:
        if getattr(tensor, '_keras_history', None) is not None:
            continue
        if isinstance(
                tensor,
            (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)):
            sparse_ops.append(tensor.op)
            continue
        if tf_utils.is_ragged(tensor):
            # Ragged tensors don't have an op property
            ragged_tensors.append(tensor)
            continue
        op = tensor.op  # The Op that created this Tensor.
        if op not in processed_ops:
            # Recursively set `_keras_history`.
            op_inputs = list(op.inputs)
            constants = {}
            layer_inputs = []
            for i, op_input in enumerate(op_inputs):
                if uses_keras_history(op_input):
                    layer_inputs.append(op_input)
                else:
                    # Treat any value not originating from a `keras.Input` as
                    # a constant. Variables cannot be supported.
                    ds_with_session = (
                        distribution_strategy_context.in_cross_replica_context(
                        ) and not ops.executing_eagerly_outside_functions())
                    using_xla = control_flow_util.GraphOrParentsInXlaContext(
                        ops.get_default_graph())
                    if ds_with_session or using_xla or _UNSAFE_GRAPH_OP_LAYER_CREATION:
                        # In Legacy Graph mode, evaluating here makes Session be
                        # configured improperly. The downside of this is that saving
                        # via `get_config` breaks, but SavedModel still works.
                        constants[i] = op_input
                    else:
                        with ops.init_scope():
                            constants[i] = backend.function([], op_input)([])
            layer_inputs = unnest_if_single_tensor(layer_inputs)
            processed_ops, created_layers = _create_keras_history_helper(
                layer_inputs, processed_ops, created_layers)
            name = op.name
            node_def = op.node_def.SerializeToString()
            op_layer = base_layer.TensorFlowOpLayer(node_def,
                                                    constants=constants,
                                                    name=name)
            created_layers.append(op_layer)
            op_layer._set_connectivity_metadata(  # pylint: disable=protected-access
                args=(layer_inputs, ),
                kwargs={},
                outputs=op.outputs)
            processed_ops.update([op])
    if sparse_ops or ragged_tensors:
        lambda_example = """
    weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights)
    output = tf.keras.layers.Lambda(weights_mult)(input)
    """
        raise ValueError(
            'Tensorflow ops that generate ragged or sparse tensor '
            'outputs are currently not supported by Keras automatic '
            'op wrapping. Please wrap these ops in a Lambda layer: '
            '\n\n```\n{example}\n```\n'
            'Sparse ops encountered: {sparse_ops}\n'
            'Ragged tensors encountered: {ragged_tensors}\n'.format(
                example=lambda_example,
                sparse_ops=str(sparse_ops),
                ragged_tensors=str(ragged_tensors)))
    return processed_ops, created_layers
Example #34
0
def load(export_dir, tags=None):
    """Load a SavedModel from `export_dir`.

  Signatures associated with the SavedModel are available as functions:

  ```python
  imported = tf.saved_model.load(path)
  f = imported.signatures["serving_default"]
  print(f(x=tf.constant([[1.]])))
  ```

  Objects exported with `tf.saved_model.save` additionally have checkpointable
  objects and functions assigned to attributes:

  ```python
  exported = tf.train.Checkpoint(v=tf.Variable(3.))
  exported.f = tf.function(
      lambda x: exported.v * x,
      input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)])
  tf.saved_model.save(exported, path)
  imported = tf.saved_model.load(path)
  assert 3. == imported.v.numpy()
  assert 6. == imported.f(x=tf.constant(2.)).numpy()
  ```

  Args:
    export_dir: The SavedModel directory to load from.
    tags: A tag or sequence of tags identifying the MetaGraph to load. Optional
      if the SavedModel contains a single MetaGraph, as for those exported from
      `tf.saved_model.load`.

  Returns:
    A checkpointable object with a `signatures` attribute mapping from signature
    keys to functions. If the SavedModel was exported by `tf.saved_model.load`,
    it also points to checkpointable objects and functions which were attached
    to the exported object.

  Raises:
    ValueError: If `tags` don't match a MetaGraph in the SavedModel.
  """
    if tags is not None:
        # Supports e.g. tags=SERVING and tags=[SERVING]
        tags = nest.flatten(tags)
    saved_model_proto = loader_impl.parse_saved_model(export_dir)
    object_graph_filename = os.path.join(
        compat.as_bytes(export_dir),
        compat.as_bytes(constants.EXTRA_ASSETS_DIRECTORY),
        compat.as_bytes("object_graph.pb"))
    if (file_io.file_exists(object_graph_filename)
            and len(saved_model_proto.meta_graphs) == 1):
        meta_graph_def = saved_model_proto.meta_graphs[0]
        if (tags is not None
                and set(tags) != set(meta_graph_def.meta_info_def.tags)):
            raise ValueError((
                "The SavedModel at {} has one MetaGraph with tags {}, but got an "
                "incompatible argument tags={} to tf.saved_model.load. You may omit "
                "it, pass 'None', or pass matching tags.").format(
                    export_dir, meta_graph_def.meta_info_def.tags, tags))
        object_graph_proto = _load_saved_object_graph_proto(
            object_graph_filename)
        with ops.init_scope():
            loader = _Loader(object_graph_proto, saved_model_proto, export_dir)
            root = loader.get(0)
    else:
        with ops.init_scope():
            root = load_v1_in_v2.load(export_dir, tags)
    return root
Example #35
0
def maybe_init_scope():
    if ops.executing_eagerly_outside_functions():
        yield
    else:
        with ops.init_scope():
            yield
Example #36
0
    def finite_differences(self, grads_and_vars, global_step, name, d_vars,
                           g_vars, d_grads, g_grads):
        """ Attempt to directly compute hessian and apply equation (6) """
        d_grads = []
        g_grads = []
        d_vars = []
        g_vars = []
        alpha = 0.5
        if self.config.alpha is not None:
            alpha = self.gan.configurable_param(self.config.alpha)
        beta = 0.5
        if self.config.beta is not None:
            beta = self.gan.configurable_param(self.config.beta)

        for grad, var in grads_and_vars:
            if var in self.gan.d_vars():
                d_vars += [var]
                d_grads += [grad]
            elif var in self.gan.g_vars():
                g_vars += [var]
                g_grads += [grad]
            else:
                raise ("Couldn't find var in g_vars or d_vars")
        orig_grads = d_grads + g_grads
        all_vars = d_vars + g_vars

        def curl():
            grads = tf.gradients(self.gan.trainer.d_loss,
                                 d_vars) + tf.gradients(
                                     self.gan.trainer.g_loss, g_vars)
            op3 = tf.group(*[
                tf.assign_sub(v, self._lr_t * grad)
                for grad, v in zip(grads, all_vars)
            ])
            with tf.get_default_graph().control_dependencies([op3]):

                def curlcombine(g1, g2):
                    stepsize = self._lr_t
                    return g1 - (g2 - g1) / stepsize

                new_grads = tf.gradients(self.gan.trainer.d_loss,
                                         d_vars) + tf.gradients(
                                             self.gan.trainer.g_loss, g_vars)
                g3s = [curlcombine(g1, g2) for g1, g2 in zip(grads, new_grads)]
                return g3s

        #gamma12
        if self.config.method == 'curl':
            all_grads = curl()
            d_grads = all_grads[:len(d_vars)]
            g_grads = all_grads[len(d_vars):]

        all_grads = d_grads + g_grads

        with ops.init_scope():
            [
                self._zeros_slot(v, "orig", self._name)
                for _, v in grads_and_vars
            ]

        v1 = [self.get_slot(v, "orig") for v in all_vars]

        restored_vars = all_vars
        tmp_vars = v1

        e1 = 0.0001
        e2 = 0.0001

        #gamma12
        save = tf.group(*[
            tf.assign(w, v)
            for w, v in zip(tmp_vars.copy(), restored_vars.copy())
        ])  # store variables

        with tf.get_default_graph().control_dependencies([save]):
            #opboth = self.optimizer.apply_gradients(grads_and_vars, global_step=global_step, name=name)
            #opdp = self.optimizer.apply_gradients(grads_and_vars[:len(d_vars)], global_step=global_step, name=name)
            #opgp = self.optimizer.apply_gradients(grads_and_vars[len(d_vars):], global_step=global_step, name=name)
            restore = tf.group(*[
                tf.assign(w, v)
                for w, v in zip(restored_vars.copy(), tmp_vars.copy())
            ])  # store variables
            opboth = [
                tf.assign_sub(w, self._lr_t * v)
                for w, v in zip(all_vars.copy(), all_grads.copy())
            ]  # store variables
            with tf.get_default_graph().control_dependencies(
                [tf.group(*opboth)]):
                if self.config.method == "curl":
                    gboth = curl()
                else:
                    gboth = tf.gradients(self.loss[0], d_vars) + tf.gradients(
                        self.loss[1], g_vars)
                with tf.get_default_graph().control_dependencies([restore]):
                    opd = opboth[:len(d_vars)]
                    with tf.get_default_graph().control_dependencies(
                        [tf.group(*opd)]):
                        if self.config.method == "curl":
                            new_d_grads = curl()
                        else:
                            new_d_grads = tf.gradients(
                                self.loss[0], d_vars) + tf.gradients(
                                    self.loss[1], g_vars)
                        with tf.get_default_graph().control_dependencies(
                            [restore]):
                            opg = opboth[len(d_vars):]
                            with tf.get_default_graph().control_dependencies(
                                [tf.group(*opg)]):
                                if self.config.method == "curl":
                                    new_g_grads = curl()
                                else:
                                    new_g_grads = tf.gradients(
                                        self.loss[0], d_vars) + tf.gradients(
                                            self.loss[1], g_vars)
                                with tf.get_default_graph(
                                ).control_dependencies([restore]):
                                    new_grads = []
                                    for _gboth, _gd, _gg, _g, _orig_g in zip(
                                            gboth, new_d_grads, new_g_grads,
                                        (d_grads + g_grads), orig_grads):
                                        a = (_gg - _g) / self._lr_t  # d2f/dx2i
                                        b = (_gboth - _gg) / (
                                            2 * self._lr_t) + (_gd - _g) / (
                                                2 * self._lr_t)  # d2f/dx1dx2
                                        c = (_gboth - _gd) / (
                                            2 * self._lr_t) + (_gg - _g) / (
                                                2 * self._lr_t)  # d2f/dx1dx2
                                        c = -c
                                        d = -(_gd -
                                              _g) / self._lr_t  # d2f/dx2j
                                        if self.config.form == 5:
                                            a = (_gg -
                                                 _g) / self._lr_t  # d2f/dx2i
                                            b = (_gboth - _gg) / (
                                                2 * self._lr_t
                                            ) + (_gd - _g) / (2 * self._lr_t
                                                              )  # d2f/dx1dx2
                                            c = (_gboth - _gd) / (
                                                2 * self._lr_t
                                            ) + (_gg - _g) / (2 * self._lr_t
                                                              )  # d2f/dx1dx2
                                            d = (_gd -
                                                 _g) / self._lr_t  # d2f/dx2j
                                        J = np.array([[a, b], [c, d]])
                                        Jt = np.transpose(J)

                                        det = a * d - b * c + 1e-8
                                        #h_1 = 1.0/det * (b+d-a-c)
                                        h_1_a = d / det
                                        h_1_b = -b / det
                                        h_1_c = -c / det
                                        h_1_d = a / det
                                        Jinv = np.array([[h_1_a, h_1_b],
                                                         [h_1_c, h_1_d]])
                                        _j = Jt[0][0] * Jinv[0][0] * _g + Jt[
                                            1][0] * Jinv[1][0] * _g + Jt[0][
                                                1] * Jinv[0][1] * _g + Jt[1][
                                                    1] * Jinv[1][1] * _g

                                        new_grads.append(alpha * _orig_g +
                                                         beta * _j)

                                    new_grads_and_vars = list(
                                        zip(new_grads, all_vars)).copy()
                                    return self.optimizer.apply_gradients(
                                        new_grads_and_vars,
                                        global_step=global_step,
                                        name=name)
def _zero_debias(unbiased_var, value, decay):
    """Compute the delta required for a debiased Variable.

  All exponential moving averages initialized with Tensors are initialized to 0,
  and therefore are biased to 0. Variables initialized to 0 and used as EMAs are
  similarly biased. This function creates the debias updated amount according to
  a scale factor, as in https://arxiv.org/abs/1412.6980.

  To demonstrate the bias the results from 0-initialization, take an EMA that
  was initialized to `0` with decay `b`. After `t` timesteps of seeing the
  constant `c`, the variable have the following value:

  ```
    EMA = 0*b^(t) + c*(1 - b)*b^(t-1) + c*(1 - b)*b^(t-2) + ...
        = c*(1 - b^t)
  ```

  To have the true value `c`, we would divide by the scale factor `1 - b^t`.

  In order to perform debiasing, we use two shadow variables. One keeps track of
  the biased estimate, and the other keeps track of the number of updates that
  have occurred.

  Args:
    unbiased_var: A Variable representing the current value of the unbiased EMA.
    value: A Tensor representing the most recent value.
    decay: A Tensor representing `1-decay` for the EMA.

  Returns:
    The amount that the unbiased variable should be updated. Computing this
    tensor will also update the shadow variables appropriately.
  """
    with variable_scope.variable_scope(unbiased_var.op.name,
                                       values=[unbiased_var, value,
                                               decay]) as scope:
        with ops.colocate_with(unbiased_var):
            with ops.init_scope():
                biased_initializer = init_ops.zeros_initializer(
                    dtype=unbiased_var.dtype)(unbiased_var.get_shape())
                local_step_initializer = init_ops.zeros_initializer()

            def _maybe_get_unique(name):
                """Get name for a unique variable, if not `reuse=True`."""
                if variable_scope.get_variable_scope().reuse:
                    return name
                vs_vars = [
                    x.op.name for x in
                    variable_scope.get_variable_scope().global_variables()
                ]
                full_name = variable_scope.get_variable_scope(
                ).name + "/" + name
                if full_name not in vs_vars: return name
                idx = 1
                while full_name + ("_%d" % idx) in vs_vars:
                    idx += 1
                return name + ("_%d" % idx)

            biased_var = variable_scope.get_variable(
                _maybe_get_unique("biased"),
                initializer=biased_initializer,
                trainable=False)
            local_step = variable_scope.get_variable(
                _maybe_get_unique("local_step"),
                shape=[],
                dtype=unbiased_var.dtype,
                initializer=local_step_initializer,
                trainable=False)

            # Get an update ops for both shadow variables.
            update_biased = state_ops.assign_sub(biased_var,
                                                 (biased_var - value) * decay,
                                                 name=scope.name)
            update_local_step = local_step.assign_add(1)

            # Compute the value of the delta to update the unbiased EMA. Make sure to
            # use the new values of the biased variable and the local step.
            with ops.control_dependencies([update_biased, update_local_step]):
                # This function gets `1 - decay`, so use `1.0 - decay` in the exponent.
                unbiased_ema_delta = (
                    unbiased_var - biased_var.read_value() /
                    (1 - math_ops.pow(1.0 - decay, local_step.read_value())))

            return unbiased_ema_delta
Example #38
0
def save(obj, export_dir, signatures=None):
    # pylint: disable=line-too-long
    """Exports the Checkpointable object `obj` to [SavedModel format](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md).

  Example usage:

  ```python
  class Adder(tf.train.Checkpoint):

    @tf.function(input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)])
    def add(self, x):
      return x + x + 1.

  to_export = Adder()
  tf.saved_model.save(to_export, '/tmp/adder')
  ```

  The resulting SavedModel is then servable with an input named "x", its value
  having any shape and dtype float32.

  The optional `signatures` argument controls which methods in `obj` will be
  available to programs which consume `SavedModel`s, for example serving
  APIs. Python functions may be decorated with
  `@tf.function(input_signature=...)` and passed as signatures directly, or
  lazily with a call to `get_concrete_function` on the method decorated with
  `@tf.function`.

  If the `signatures` argument is omitted, `obj` will be searched for
  `@tf.function`-decorated methods. If exactly one `@tf.function` is found, that
  method will be used as the default signature for the SavedModel. This behavior
  is expected to change in the future, when a corresponding
  `tf.saved_model.load` symbol is added. At that point signatures will be
  completely optional, and any `@tf.function` attached to `obj` or its
  dependencies will be exported for use with `load`.

  When invoking a signature in an exported SavedModel, `Tensor` arguments are
  identified by name. These names will come from the Python function's argument
  names by default. They may be overridden by specifying a `name=...` argument
  in the corresponding `tf.TensorSpec` object. Explicit naming is required if
  multiple `Tensor`s are passed through a single argument to the Python
  function.

  The outputs of functions used as `signatures` must either be flat lists, in
  which case outputs will be numbered, or a dictionary mapping string keys to
  `Tensor`, in which case the keys will be used to name outputs.

  Since `tf.keras.Model` objects are also Checkpointable, this function can be
  used to export Keras models. For example, exporting with a signature
  specified:

  ```python
  class Model(tf.keras.Model):

    @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
    def serve(self, serialized):
      ...

  m = Model()
  tf.saved_model.save(m, '/tmp/saved_model/')
  ```

  Exporting from a function without a fixed signature:

  ```python
  class Model(tf.keras.Model):

    @tf.function
    def call(self, x):
      ...

  m = Model()
  tf.saved_model.save(
      m, '/tmp/saved_model/',
      signatures=m.call.get_concrete_function(
          tf.TensorSpec(shape=[None, 3], dtype=tf.float32, name="inp")))
  ```

  `tf.keras.Model` instances constructed from inputs and outputs already have a
  signature and so do not require a `@tf.function` decorator or a `signatures`
  argument. If neither are specified, the model's forward pass is exported.

  ```python
  x = input_layer.Input((4,), name="x")
  y = core.Dense(5, name="out")(x)
  model = training.Model(x, y)
  tf.saved_model.save(model, '/tmp/saved_model/')
  # The exported SavedModel takes "x" with shape [None, 4] and returns "out"
  # with shape [None, 5]
  ```

  Variables must be tracked by assigning them to an attribute of a tracked
  object or to an attribute of `obj` directly. TensorFlow objects (e.g. layers
  from `tf.keras.layers`, optimizers from `tf.train`) track their variables
  automatically. This is the same tracking scheme that `tf.train.Checkpoint`
  uses, and an exported `Checkpoint` object may be restored as a training
  checkpoint by pointing `tf.train.Checkpoint.restore` to the SavedModel's
  "variables/" subdirectory. Currently variables are the only stateful objects
  supported by `tf.saved_model.save`, but others (e.g. tables) will be supported
  in the future.

  `tf.function` does not hard-code device annotations from outside the function
  body, instead using the calling context's device. This means for example that
  exporting a model which runs on a GPU and serving it on a CPU will generally
  work, with some exceptions. `tf.device` annotations inside the body of the
  function will be hard-coded in the exported model; this type of annotation is
  discouraged. Device-specific operations, e.g. with "cuDNN" in the name or with
  device-specific layouts, may cause issues. Currently a `DistributionStrategy`
  is another exception: active distribution strategies will cause device
  placements to be hard-coded in a function. Exporting a single-device
  computation and importing under a `DistributionStrategy` is not currently
  supported, but may be in the future.

  SavedModels exported with `tf.saved_model.save` [strip default-valued
  attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes)
  automatically, which removes one source of incompatibilities when the consumer
  of a SavedModel is running an older TensorFlow version than the
  producer. There are however other sources of incompatibilities which are not
  handled automatically, such as when the exported model contains operations
  which the consumer does not have definitions for.

  The current implementation of `tf.saved_model.save` targets serving use-cases,
  but omits information which will be necessary for the planned future
  implementation of `tf.saved_model.load`. Exported models using the current
  `save` implementation, and other existing SavedModels, will not be compatible
  with `tf.saved_model.load` when it is implemented. Further, `save` will in the
  future attempt to export `@tf.function`-decorated methods which it does not
  currently inspect, so some objects which are exportable today will raise
  exceptions on export in the future (e.g. due to complex/non-serializable
  default arguments). Such backwards-incompatible API changes are expected only
  prior to the TensorFlow 2.0 release.

  Args:
    obj: A checkpointable object to export.
    export_dir: A directory in which to write the SavedModel.
    signatures: Optional, either a `tf.function` with an input signature
      specified or the result of `f.get_concrete_function` on a
      `@tf.function`-decorated function `f`, in which case `f` will be used to
      generate a signature for the SavedModel under the default serving
      signature key. `signatures` may also be a dictionary, in which case it
      maps from signature keys to either `tf.function` instances with input
      signatures or concrete functions. The keys of such a dictionary may be
      arbitrary strings, but will typically be from the
      `tf.saved_model.signature_constants` module.

  Raises:
    ValueError: If `obj` is not checkpointable.

  @compatibility(eager)
  Not supported when graph building. From TensorFlow 1.x,
  `tf.enable_eager_execution()` must run first. May not be called from within a
  function body.
  @end_compatibility
  """
    if not context.executing_eagerly():
        with ops.init_scope():
            if context.executing_eagerly():
                raise AssertionError(
                    "tf.saved_model.save is not supported inside a traced "
                    "@tf.function. Move the call to the outer eagerly-executed "
                    "context.")
            else:
                raise AssertionError(
                    "tf.saved_model.save is not supported when graph building. "
                    "tf.enable_eager_execution() must run first when calling it from "
                    "TensorFlow 1.x.")
    # pylint: enable=line-too-long
    if not isinstance(obj, base.CheckpointableBase):
        raise ValueError(
            "Expected a Checkpointable object for export, got {}.".format(obj))
    if signatures is None:
        # Note that we run this before saving the checkpoint, since looping over
        # attributes may have the side effect of creating variables in some cases.
        signatures = _find_function_to_export(obj)

    signatures = _canonicalize_signatures(signatures)
    # TODO(allenl): Factor out some subset of SavedModelBuilder which is 2.x
    # compatible (no sessions) and share it with this export API rather than
    # making a SavedModel proto and writing it directly.
    saved_model = saved_model_pb2.SavedModel()
    meta_graph_def = saved_model.meta_graphs.add()
    object_saver = util.CheckpointableSaver(obj)
    asset_info = _fill_meta_graph_def(meta_graph_def, obj, signatures,
                                      object_saver)
    saved_model.saved_model_schema_version = (
        constants.SAVED_MODEL_SCHEMA_VERSION)
    # So far we've just been generating protocol buffers with no I/O. Now we write
    # the checkpoint, copy assets into the assets directory, and write out the
    # SavedModel proto itself.
    utils_impl.get_or_create_variables_dir(export_dir)
    object_saver.save(utils_impl.get_variables_path(export_dir))
    builder_impl.copy_assets_to_destination_dir(asset_info.asset_filename_map,
                                                export_dir)
    path = os.path.join(compat.as_bytes(export_dir),
                        compat.as_bytes(constants.SAVED_MODEL_FILENAME_PB))
    file_io.write_string_to_file(path, saved_model.SerializeToString())
    _write_object_graph(obj, export_dir, asset_info.asset_index)
Example #39
0
 def _state_callback_wrapper():
     with ops.init_scope():
         return state_callback()
Example #40
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      global_step: Optional `Variable` to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        # This is a default implementation of apply_gradients() that can be shared
        # by most optimizers.  It relies on the subclass implementing the following
        # methods: _create_slots(), _prepare(), _apply_dense(), and _apply_sparse().

        grads_and_vars = tuple(
            grads_and_vars)  # Make sure repeat iteration works.
        if not grads_and_vars:
            raise ValueError("No variables provided.")
        converted_grads_and_vars = []
        for g, v in grads_and_vars:
            if g is not None:
                try:
                    # Convert the grad to Tensor or IndexedSlices if necessary.
                    g = ops.convert_to_tensor_or_indexed_slices(g)
                except TypeError:
                    raise TypeError("Gradient must be convertible to a Tensor"
                                    " or IndexedSlices, or None: %s" % g)
                if not isinstance(g, (ops.Tensor, ops.IndexedSlices)):
                    raise TypeError(
                        "Gradient must be a Tensor, IndexedSlices, or None: %s"
                        % g)
            p = _get_processor(v)
            converted_grads_and_vars.append((g, v, p))

        converted_grads_and_vars = tuple(converted_grads_and_vars)
        var_list = [v for g, v, _ in converted_grads_and_vars if g is not None]
        if not var_list:
            raise ValueError("No gradients provided for any variable: %s." %
                             ([str(v)
                               for _, _, v in converted_grads_and_vars], ))
        with ops.init_scope():
            self._create_slots([_get_variable_for(v) for v in var_list])
        update_ops = []
        with ops.name_scope(name, self._name) as name:
            self._prepare()
            for grad, var, processor in converted_grads_and_vars:
                if grad is None:
                    continue
                # We colocate all ops created in _apply_dense or _apply_sparse
                # on the same device as the variable.
                # TODO(apassos): figure out how to get the variable name here.
                scope_name = var.op.name if context.in_graph_mode() else ""
                with ops.name_scope("update_" +
                                    scope_name), ops.colocate_with(var):
                    update_ops.append(processor.update_op(self, grad))
            if global_step is None:
                apply_updates = self._finish(update_ops, name)
            else:
                with ops.control_dependencies(
                    [self._finish(update_ops, "update")]):
                    with ops.colocate_with(global_step):
                        if isinstance(global_step,
                                      resource_variable_ops.ResourceVariable):
                            # TODO(apassos): the implicit read in assign_add is slow; consider
                            # making it less so.
                            apply_updates = resource_variable_ops.assign_add_variable_op(
                                global_step.handle,
                                ops.convert_to_tensor(1,
                                                      dtype=global_step.dtype),
                                name=name)
                        else:
                            apply_updates = state_ops.assign_add(global_step,
                                                                 1,
                                                                 name=name)

            if context.in_graph_mode():
                if isinstance(apply_updates, ops.Tensor):
                    apply_updates = apply_updates.op
                train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
                if apply_updates not in train_op:
                    train_op.append(apply_updates)

            return apply_updates
Example #41
0
def func_graph_from_py_func(name,
                            python_func,
                            args,
                            kwargs,
                            signature=None,
                            func_graph=None,
                            autograph=False,
                            autograph_options=None,
                            add_control_dependencies=True,
                            arg_names=None,
                            op_return_value=None,
                            collections=None,
                            capture_by_value=None,
                            override_flat_arg_shapes=None):
  """Returns a `FuncGraph` generated from `python_func`.

  Args:
    name: an identifier for the function.
    python_func: the Python function to trace.
    args: the positional args with which the Python function should be called;
      ignored if a signature is provided.
    kwargs: the keyword args with which the Python function should be called;
      ignored if a signature is provided.
    signature: a possibly nested sequence of `TensorSpecs` specifying the shapes
      and dtypes of the arguments. When a signature is provided, `args` and
      `kwargs` are ignored, and `python_func` is traced with Tensors conforming
      to `signature`. If `None`, the shapes and dtypes are inferred from the
      inputs.
    func_graph: Optional. An instance of FuncGraph. If provided, we will use
      this graph else a new one is built and returned.
    autograph: whether to use autograph to compile `python_func`.
      See https://www.tensorflow.org/guide/autograph for more information.
    autograph_options: additional knobs to control when `autograph=True`.
      See https://www.tensorflow.org/guide/autograph for more information.
    add_control_dependencies: If True, automatically adds control dependencies
      to ensure program order matches execution order and stateful ops always
      execute.
    arg_names: Optional list of argument names, used to give input placeholders
      recognizable names.
    op_return_value: Optional. A Tensor. If set and `python_func` returns
      Operations, those return values will be replaced with this value. If not
      set, returning an Operation triggers an error.
    collections: a dictionary of collections this FuncGraph should start
      with. If not specified (None), the FuncGraph will read (but not write to)
      the outer graph's collections that are not whitelisted, and both
      read and write to the outer graph's collections that are whitelisted.
      The current whitelisted collections are the global variables, the
      local variables, and the trainable variables.
      Defaults to None.
    capture_by_value: An optional boolean. If True, the func graph will capture
      Variables by value instead of reference. By default inherit from outer
      graphs, and failing that will default to False.
    override_flat_arg_shapes: An optional list of instances that are either
      `None` or `TensorShape`.  The length must match that of
      `nest.flatten((args, kwargs))`.  The entries containing value `None`
      must match entries in flattened arguments containing non-tensors, while
      entries containing a `TensorShape` must match entries in the flattened
      arguments containing tensors.

  Returns:
    A FuncGraph.

  Raises:
    TypeError: If any of `python_func`'s return values is neither `None` nor a
      `Tensor`.
    ValueError: If both `signature` and `override_flat_arg_shapes` are
      passed in.
  """
  if op_return_value is not None:
    assert isinstance(op_return_value, ops.Tensor), op_return_value
  if func_graph is None:
    func_graph = FuncGraph(name, collections=collections,
                           capture_by_value=capture_by_value)
  assert isinstance(func_graph, FuncGraph)
  if add_control_dependencies:
    control_manager = AutomaticControlDependencies()
  else:
    control_manager = ops.NullContextmanager()
  with func_graph.as_default(), control_manager as a:
    current_scope = variable_scope.get_variable_scope()
    default_use_recource = current_scope.use_resource
    current_scope.set_use_resource(True)

    if signature is not None and override_flat_arg_shapes is not None:
      raise ValueError(
          "Passed both signature and override_flat_arg_shapes: %s and %s."
          % (signature, override_flat_arg_shapes))

    if signature is not None:
      args = signature
      kwargs = {}

    # Creates and names placeholders for all arguments.
    if override_flat_arg_shapes is not None:
      flat_args = nest.flatten(args)
      arg_shapes = override_flat_arg_shapes[:len(flat_args)]
      kwarg_shapes = override_flat_arg_shapes[len(flat_args):]
    else:
      arg_shapes = None
      kwarg_shapes = None
    func_args = _get_defun_inputs_from_args(
        args, arg_names, flat_shapes=arg_shapes)
    func_kwargs = _get_defun_inputs_from_kwargs(
        kwargs, flat_shapes=kwarg_shapes)

    # Convert all Tensors into TensorSpecs before saving the structured inputs.
    # If storing pure concrete functions that are not called through polymorphic
    # functions, we don't have access to FunctionSpec, so we need to call the
    # TensorSpecs by their `arg_names` for later binding.
    func_graph.structured_input_signature = (
        convert_structure_to_signature(func_args, arg_names),
        convert_structure_to_signature(func_kwargs))

    flat_func_args = nest.flatten(func_args)
    flat_func_kwargs = nest.flatten(func_kwargs)
    # Temporarily set inputs to allow graph building code to inspect
    # them. Reassigned below.
    func_graph.inputs = [arg for arg in flat_func_args + flat_func_kwargs
                         if isinstance(arg, ops.Tensor)]

    # Note: `nest.flatten` sorts by keys, as does `_deterministic_dict_values`.
    # Variables to help check whether mutation happens in calling the function
    # Copy the recursive list, tuple and map structure, but not base objects
    func_args_before = nest.pack_sequence_as(func_args, flat_func_args)
    func_kwargs_before = nest.pack_sequence_as(
        func_kwargs, flat_func_kwargs)

    def convert(x):
      """Converts a function output to a Tensor."""
      if x is None:
        return None
      if op_return_value is not None and isinstance(x, ops.Operation):
        # TODO(b/79881896): we currently can't capture external control deps, so
        # this won't work if x needs to be captured (i.e. if python_func returns
        # captured Operations).
        with ops.control_dependencies([x]):
          x = array_ops.identity(op_return_value)
      elif not isinstance(x, tensor_array_ops.TensorArray):
        try:
          x = ops.convert_to_tensor_or_composite(x)
        except (ValueError, TypeError):
          raise TypeError(
              "To be compatible with tf.contrib.eager.defun, Python functions "
              "must return zero or more Tensors; in compilation of %s, found "
              "return value of type %s, which is not a Tensor." %
              (str(python_func), type(x)))
      if add_control_dependencies:
        x = a.mark_as_return(x)
      return x

    this_tape = tape.push_new_tape()
    try:
      if autograph:
        from tensorflow.python import autograph  # pylint: disable=g-import-not-at-top
        _, original_func = tf_decorator.unwrap(python_func)

        def wrapper(*args, **kwargs):
          # Note: functions annotated with @tf.function should always be
          # converted even though they would meet autograph's whitelisting
          # criteria.
          # If this assumption is ever broken, converted_call will need to
          # handle the possibility of original_func still being a shim, e.g.
          # bound to WeakrefSelf.
          return autograph.converted_call(
              original_func, None,
              autograph.ConversionOptions(
                  verbose=autograph.Verbosity.BRIEF,
                  recursive=True,
                  strip_decorators=(def_function.function,),
                  optional_features=autograph_options,
                  force_conversion=True,
              ), args, kwargs)

        # Wrapping around a decorator allows checks like tf_inspect.getargspec
        # to be accurate.
        converted_func = tf_decorator.make_decorator(original_func, wrapper)
        python_func = tf_decorator.rewrap(python_func, original_func,
                                          converted_func)

      func_outputs = python_func(*func_args, **func_kwargs)

      # invariant: `func_outputs` contains only Tensors, IndexedSlices,
      # SparseTensors, TensorArrays and `None`s.
      func_outputs = nest.map_structure(convert, func_outputs)

      check_mutation(func_args_before, func_args)
      check_mutation(func_kwargs_before, func_kwargs)
    finally:
      tape.pop_tape(this_tape)
      current_scope.set_use_resource(default_use_recource)

    # Variables in `func_args`, `func_kwargs` should be explicit inputs
    # to the function, not captured inputs.
    tape_variables = this_tape.watched_variables()
    arg_variables = set()
    inputs = []
    for arg in nest.flatten(func_args) + nest.flatten(func_kwargs):
      if isinstance(arg, resource_variable_ops.ResourceVariable):
        # Even if an argument variable was not used in the function, we've
        # already manually captured the resource Tensor when creating argument
        # placeholders.
        resource_placeholder = func_graph.captures.pop(arg.handle, None)
        if resource_placeholder is None:
          continue
        arg_variables.add(arg)
        inputs.append(resource_placeholder)
      elif isinstance(arg, ops.Tensor):
        inputs.append(arg)
    variables = [v for v in tape_variables if v not in arg_variables]
    func_graph.inputs = inputs + list(func_graph.captures.values())

    func_graph.structured_outputs = func_outputs
    # Returning a closed-over tensor does not trigger convert_to_tensor.
    func_graph.outputs.extend(
        func_graph.capture(x)
        for x in flatten(func_graph.structured_outputs)
        if x is not None)

    func_graph.variables = variables

  if add_control_dependencies:
    func_graph.control_outputs.extend(control_manager.ops_which_must_run)

# Register any other functions defined in the graph.
  with ops.init_scope():
    if context.executing_eagerly():
      for f in func_graph._functions.values():  # pylint: disable=protected-access
        # TODO(ashankar): What about the gradient registry?
        context.add_function(f._c_func.func)  # pylint: disable=protected-access

  return func_graph
Example #42
0
    def _init_from_args(self,
                        initial_value=None,
                        trainable=True,
                        collections=None,
                        validate_shape=True,
                        caching_device=None,
                        name=None,
                        dtype=None,
                        constraint=None):
        """Creates a variable.

    Args:
      initial_value: A `Tensor`, or Python object convertible to a `Tensor`,
        which is the initial value for the Variable. The initial value must have
        a shape specified unless `validate_shape` is set to False. Can also be a
        callable with no argument that returns the initial value when called.
        (Note that initializer functions from init_ops.py must first be bound
         to a shape before being used here.)
      trainable: If `True`, the default, also adds the variable to the graph
        collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as
        the default list of variables to use by the `Optimizer` classes.
      collections: List of graph collections keys. The new variable is added to
        these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
      validate_shape: Ignored. Provided for compatibility with tf.Variable.
      caching_device: Optional device string or function describing where the
        Variable should be cached for reading.  Defaults to the Variable's
        device.  If not `None`, caches on another device.  Typical use is to
        cache on the device where the Ops using the Variable reside, to
        deduplicate copying through `Switch` and other conditional statements.
      name: Optional name for the variable. Defaults to `'Variable'` and gets
        uniquified automatically.
      dtype: If set, initial_value will be converted to the given type.
        If None, either the datatype will be kept (if initial_value is
       a Tensor) or float32 will be used (if it is a Python object convertible
       to a Tensor).
      constraint: An optional projection function to be applied to the variable
        after being updated by an `Optimizer` (e.g. used to implement norm
        constraints or value constraints for layer weights). The function must
        take as input the unprojected Tensor representing the value of the
        variable and return the Tensor for the projected value
        (which must have the same shape). Constraints are not safe to
        use when doing asynchronous distributed training.

    Raises:
      ValueError: If the initial value is not specified, or does not have a
        shape and `validate_shape` is `True`.

    @compatibility(eager)
    When Eager Execution is enabled, variables are never added to collections.
    It is not implicitly added to the `GLOBAL_VARIABLES` or
    `TRAINABLE_VARIABLES` collections, and the `collections` argument is
    ignored.
    @end_compatibility
    """
        if initial_value is None:
            raise ValueError("initial_value must be specified.")
        init_from_fn = callable(initial_value)

        if collections is None:
            collections = [ops.GraphKeys.GLOBAL_VARIABLES]
        if not isinstance(collections, (list, tuple, set)):
            raise ValueError(
                "collections argument to Variable constructor must be a list, tuple, "
                "or set. Got %s of type %s" % (collections, type(collections)))
        if constraint is not None and not callable(constraint):
            raise ValueError("The `constraint` argument must be a callable.")

        if isinstance(initial_value, checkpointable.CheckpointInitialValue):
            self._maybe_initialize_checkpointable()
            self._update_uid = initial_value.checkpoint_position.restore_uid
            initial_value = initial_value.wrapped_value

        self._trainable = trainable
        if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections:
            collections = list(collections) + [
                ops.GraphKeys.TRAINABLE_VARIABLES
            ]
        self._save_slice_info = None
        # Store the graph key so optimizers know how to only retrieve variables from
        # this graph.
        self._graph_key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
        with ops.init_scope():
            self._in_graph_mode = not context.executing_eagerly()
            with ops.name_scope(
                    name, "Variable",
                [] if init_from_fn else [initial_value]) as name:
                # pylint: disable=protected-access
                handle_name = ops._name_from_scope_name(name)
                if self._in_graph_mode:
                    shared_name = handle_name
                else:
                    # When in eager mode use a uid for the shared_name, to prevent
                    # accidental sharing.
                    shared_name = "%s_%d" % (handle_name, ops.uid())
                if init_from_fn:
                    # Use attr_scope and device(None) to simulate the behavior of
                    # colocate_with when the variable we want to colocate with doesn't
                    # yet exist.
                    if self._in_graph_mode:
                        attr = attr_value_pb2.AttrValue(
                            list=attr_value_pb2.AttrValue.ListValue(
                                s=[compat.as_bytes("loc:@%s" % handle_name)]))
                        with ops.get_default_graph()._attr_scope(
                            {"_class": attr}):
                            with ops.name_scope("Initializer"), ops.device(
                                    None):
                                initial_value = ops.convert_to_tensor(
                                    initial_value(),
                                    name="initial_value",
                                    dtype=dtype)
                            self._handle = _eager_safe_variable_handle(
                                shape=initial_value.get_shape(),
                                dtype=initial_value.dtype.base_dtype,
                                shared_name=shared_name,
                                name=name,
                                graph_mode=self._in_graph_mode)
                            self._shape = initial_value.get_shape()
                    else:
                        initial_value = initial_value()
                        with ops.name_scope("Initializer"):
                            initial_value = ops.convert_to_tensor(
                                initial_value,
                                name="initial_value",
                                dtype=dtype)
                        self._handle = _eager_safe_variable_handle(
                            shape=initial_value.get_shape(),
                            dtype=initial_value.dtype.base_dtype,
                            shared_name=shared_name,
                            name=name,
                            graph_mode=False)
                        self._shape = initial_value.get_shape()
                # pylint: enable=protected-access

                # Or get the initial value from a Tensor or Python object.
                else:
                    with ops.name_scope("Initializer"):
                        initial_value = ops.convert_to_tensor(
                            initial_value, name="initial_value", dtype=dtype)
                    # pylint: disable=protected-access
                    if (self._in_graph_mode and initial_value is not None
                            and initial_value.op._get_control_flow_context()
                            is not None):
                        raise ValueError(
                            "Initializer for variable %s is from inside a control-flow "
                            "construct, such as a loop or conditional. When creating a "
                            "variable inside a loop or conditional, use a lambda as the "
                            "initializer." % name)
                    # pylint: enable=protected-access
                    self._handle = _eager_safe_variable_handle(
                        shape=initial_value.get_shape(),
                        dtype=initial_value.dtype.base_dtype,
                        shared_name=shared_name,
                        name=name,
                        graph_mode=self._in_graph_mode)
                    self._shape = initial_value.get_shape()

                self._unique_id = shared_name
                self._initial_value = initial_value if self._in_graph_mode else None
                self._handle_name = handle_name + ":0"
                self._dtype = initial_value.dtype.base_dtype
                self._constraint = constraint

                if self._in_graph_mode:
                    with ops.name_scope("IsInitialized"):
                        self._is_initialized_op = (
                            gen_resource_variable_ops.var_is_initialized_op(
                                self._handle))
                    if initial_value is not None:
                        with ops.name_scope("Assign") as n, ops.colocate_with(
                                self._handle):
                            self._initializer_op = (
                                gen_resource_variable_ops.assign_variable_op(
                                    self._handle,
                                    self.
                                    _try_guard_against_uninitialized_dependencies(
                                        initial_value),
                                    name=n))
                    with ops.name_scope("Read"), ops.colocate_with(
                            self._handle):
                        # Manually assign reads to the handle's device to avoid log
                        # messages.
                        with ops.device(self._handle.device):
                            value = self._read_variable_op()
                        self._graph_element = value
                        if caching_device is not None:
                            # Variables may be created in a tf.device() or ops.colocate_with()
                            # context. At the same time, users would expect caching device to
                            # be independent of this context, and/or would not expect the
                            # current device context to be merged with the caching device
                            # spec.  Therefore we reset the colocation stack before creating
                            # the cached value. Note that resetting the colocation stack will
                            # also reset the device stack.
                            with ops.colocate_with(None, ignore_existing=True):
                                with ops.device(caching_device):
                                    self._cached_value = array_ops.identity(
                                        value)
                        else:
                            self._cached_value = None
                else:
                    gen_resource_variable_ops.assign_variable_op(
                        self._handle, initial_value)
                    self._is_initialized_op = None
                    self._initializer_op = None
                    self._graph_element = None
                    if caching_device:
                        with ops.device(caching_device):
                            self._cached_value = self._read_variable_op()
                    else:
                        self._cached_value = None
                if not context.executing_eagerly():
                    ops.add_to_collections(collections, self)
                elif ops.GraphKeys.GLOBAL_STEP in collections:
                    ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self)

        if not self._in_graph_mode:
            # After the handle has been created, set up a way to clean it up when
            # executing eagerly. We'll hold the only reference to the deleter, so that
            # when this object is garbage collected the deleter will be too. This
            # means ResourceVariables can be part of reference cycles without those
            # cycles being uncollectable, and means that no __del__ will be defined at
            # all in graph mode.
            self._handle_deleter = EagerResourceDeleter(
                handle=self._handle, handle_device=self._handle.device)
        self._cached_shape_as_list = None
Example #43
0
 def iterations(self):
     """The number of gradient updates the optimizer has completed so far."""
     with ops.init_scope():
         graph = None if context.executing_eagerly(
         ) else ops.get_default_graph()
         return self._get_non_slot_variable('iterations', graph=graph)
    def apply(self, var_list=None):
        """Maintains moving averages of variables.

    `var_list` must be a list of `Variable` or `Tensor` objects.  This method
    creates shadow variables for all elements of `var_list`.  Shadow variables
    for `Variable` objects are initialized to the variable's initial value.
    They will be added to the `GraphKeys.MOVING_AVERAGE_VARIABLES` collection.
    For `Tensor` objects, the shadow variables are initialized to 0 and zero
    debiased (see docstring in `assign_moving_average` for more details).

    shadow variables are created with `trainable=False` and added to the
    `GraphKeys.ALL_VARIABLES` collection.  They will be returned by calls to
    `tf.global_variables()`.

    Returns an op that updates all shadow variables as described above.

    Note that `apply()` can be called multiple times with different lists of
    variables.

    Args:
      var_list: A list of Variable or Tensor objects. The variables
        and Tensors must be of types float16, float32, or float64.

    Returns:
      An Operation that updates the moving averages.

    Raises:
      TypeError: If the arguments are not all float16, float32, or float64.
      ValueError: If the moving average of one of the variables is already
        being computed.
    """
        # TODO(touts): op_scope
        if var_list is None:
            var_list = variables.trainable_variables()
        zero_debias_true = set()  # set of vars to set `zero_debias=True`
        for var in var_list:
            if var.dtype.base_dtype not in [
                    dtypes.float16, dtypes.float32, dtypes.float64
            ]:
                raise TypeError(
                    "The variables must be half, float, or double: %s" %
                    var.name)
            if var in self._averages:
                raise ValueError("Moving average already computed for: %s" %
                                 var.name)

            # For variables: to lower communication bandwidth across devices we keep
            # the moving averages on the same device as the variables. For other
            # tensors, we rely on the existing device allocation mechanism.
            with ops.init_scope():
                if isinstance(var, variables.Variable):
                    avg = slot_creator.create_slot(var,
                                                   var.initialized_value(),
                                                   self._name,
                                                   colocate_with_primary=True)
                    # NOTE(mrry): We only add `tf.Variable` objects to the
                    # `MOVING_AVERAGE_VARIABLES` collection.
                    ops.add_to_collection(
                        ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
                else:
                    avg = slot_creator.create_zeros_slot(
                        var,
                        self._name,
                        colocate_with_primary=(var.op.type in [
                            "Variable", "VariableV2", "VarHandleOp"
                        ]))
                    if self._zero_debias:
                        zero_debias_true.add(avg)
            self._averages[var] = avg

        with ops.name_scope(self._name) as scope:
            decay = ops.convert_to_tensor(self._decay, name="decay")
            if self._num_updates is not None:
                num_updates = math_ops.cast(self._num_updates,
                                            dtypes.float32,
                                            name="num_updates")
                decay = math_ops.minimum(decay, (1.0 + num_updates) /
                                         (10.0 + num_updates))
            updates = []
            for var in var_list:
                zero_debias = self._averages[var] in zero_debias_true
                updates.append(
                    assign_moving_average(self._averages[var],
                                          var,
                                          decay,
                                          zero_debias=zero_debias))
            return control_flow_ops.group(*updates, name=scope)
Example #45
0
    def apply_gradients(self, grads_and_vars, name=None):
        """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        `compute_gradients()`.
      name: Optional name for the returned operation.  Default to the name
        passed to the `Optimizer` constructor.

    Returns:
      An `Operation` that applies the specified gradients. If `global_step`
      was not None, that operation also increments `global_step`.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
        grads_and_vars = _filter_grads(grads_and_vars)
        var_list = [v for (_, v) in grads_and_vars]
        if distribute_ctx.has_strategy():
            reduced_grads = merge_grads(grads_and_vars)
            grads_and_vars = zip(reduced_grads, var_list)

        self._create_hypers()
        with ops.init_scope():
            self._create_slots(var_list)
        update_ops = []

        self._prepare(var_list)

        def update_grad_to_var(grad, var):
            """Apply gradient to variable."""
            if isinstance(var, ops.Tensor):
                raise NotImplementedError("Trying to update a Tensor ", var)
            if isinstance(grad, ops.IndexedSlices):
                if var.constraint is not None:
                    raise RuntimeError(
                        "Cannot use a constraint function on a sparse variable."
                    )
                return self._resource_apply_sparse_duplicate_indices(
                    grad.values, var, grad.indices)
            update_op = self._resource_apply_dense(grad, var)
            if var.constraint is not None:
                with ops.control_dependencies([update_op]):
                    return var.assign(var.constraint(var))
            else:
                return update_op

        with ops.name_scope(name, self._name) as name:
            for grad, var in grads_and_vars:
                scope_name = ("" if ops.executing_eagerly_outside_functions()
                              else "_" + var.op.name)
                with ops.name_scope("update" + scope_name):
                    update_ops.append(update_grad_to_var(grad, var))
            # control dependencies does not work in per replica mode, please change
            # this once b/118841692 is fixed.
            # with ops.control_dependencies(update_ops):
            #   apply_updates = self._iterations.assign_add(1).op
            apply_updates = merge_update_step(update_ops, self.iterations)
            return apply_updates
Example #46
0
        def failing_function():
            a = constant_op.constant(1.)

            with ops.init_scope():
                _ = a + a
Example #47
0
def get_global_generator():
    global global_generator
    if global_generator is None:
        with ops.init_scope():
            global_generator = Generator.from_non_deterministic_state()
    return global_generator
Example #48
0
 def create_variable():
     with ops.init_scope():
         if not a:
             a.append(variables.Variable(initial_value_fn))
Example #49
0
 def _build_graph_network_for_inferred_shape(self,
                                             input_shape,
                                             input_dtype=None):
     if input_shape is None or not self.layers:
         return
     if not tf2.enabled() or not ops.executing_eagerly_outside_functions():
         # This behavior is disabled in V1 or when eager execution is disabled.
         return
     if (not self._has_explicit_input_shape
             and not self._use_legacy_deferred_behavior):
         # Determine whether the input shape is novel, i.e. whether the model
         # should be rebuilt.
         input_shape = tuple(input_shape)
         if self._inferred_input_shape is None:
             new_shape = input_shape
         else:
             new_shape = relax_input_shape(self._inferred_input_shape,
                                           input_shape)
         if (new_shape is not None
                 and new_shape != self._inferred_input_shape):
             # A novel shape has been received: we need to rebuild the model.
             # In case we are inside a graph function, we step out of it.
             with ops.init_scope():
                 inputs = input_layer.Input(batch_shape=new_shape,
                                            dtype=input_dtype,
                                            name=self.layers[0].name +
                                            '_input')
                 layer_input = inputs
                 created_nodes = set()
                 for layer in self.layers:
                     # Clear nodes previously created via this method. This prevents
                     # node accumulation and ensures that e.g. `layer.output` is
                     # always connected to `model.inputs`
                     # (this is important e.g. for the feature extraction use case).
                     # We don't just do `layer._inbound_nodes = []` in order
                     # not to break shared layers added to Sequential models (which is
                     # technically illegal as per the `add()` docstring,
                     # but wasn't previously disabled).
                     clear_previously_created_nodes(layer,
                                                    self._created_nodes)
                     try:
                         # Create Functional API connection by calling the current layer
                         layer_output = layer(layer_input)
                     except:  # pylint:disable=bare-except
                         # Functional API calls may fail for a number of reasons:
                         # 1) The layer may be buggy. In this case it will be easier for
                         # the user to debug if we fail on the first call on concrete data,
                         # instead of our own call on a symbolic input.
                         # 2) The layer is dynamic (graph-incompatible) and hasn't
                         # overridden `compute_output_shape`. In this case, it is
                         # impossible to build a graph network.
                         # 3) The layer is otherwise incompatible with the Functional API
                         # (e.g. this is the case for some probabilistic layers that rely
                         # on hacks and that do not return tensors).
                         # In all these cases, we should avoid creating a graph network
                         # (or we simply can't).
                         self._use_legacy_deferred_behavior = True
                         return
                     if len(nest.flatten(layer_output)) != 1:
                         raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
                     # Keep track of nodes just created above
                     track_nodes_created_by_last_call(layer, created_nodes)
                     layer_input = layer_output
                     outputs = layer_output
                 self._created_nodes = created_nodes
                 try:
                     # Initialize a graph Network. This call will never fail for
                     # a stack of valid Keras layers.
                     # However some users have layers that are fundamentally incompatible
                     # with the Functional API, which do not return tensors. In this
                     # case, we fall back to the legacy deferred behavior.
                     # TODO(fchollet): consider raising here, as we should not be
                     # supporting such layers.
                     self._init_graph_network(inputs, outputs)
                     self._graph_initialized = True
                 except:  # pylint:disable=bare-except
                     self._use_legacy_deferred_behavior = True
             self._inferred_input_shape = new_shape
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        var_list = [v for _, v in grads_and_vars]
        d_vars = []
        g_vars = []
        all_grads = [g for g, _ in grads_and_vars]
        for grad, var in grads_and_vars:
            if var in self.gan.d_vars():
                d_vars += [var]
            elif var in self.gan.g_vars():
                g_vars += [var]
            else:
                raise ("Couldn't find var in g_vars or d_vars")

        with ops.init_scope():
            f1 = [self._zeros_slot(v, "f", self._name) for v in var_list]

        with ops.init_scope():
            v1 = [
                self._get_or_make_slot(v, v, "v1", self._name)
                for v in var_list
            ]
            if self.config.include_slots:
                for name in self.optimizer.get_slot_names():
                    for var in self.optimizer.variables():
                        self._zeros_slot(var, "pm", "pm")
        self._prepare()

        f1 = [self.get_slot(v, "f") for v in var_list]
        v1 = [self.get_slot(v, "v1") for v in var_list]
        slots_list = []
        slots_vars = []
        if self.config.include_slots:
            for name in self.optimizer.get_slot_names():
                for var in self.optimizer.variables():
                    slots_vars += [var]
                    slots_list.append(self._zeros_slot(var, "pm", "pm"))

        current_vars = var_list + slots_vars
        tmp_vars = v1 + slots_list

        diff = [tf.square(v - t) for v, t in zip(current_vars, tmp_vars)]

        if isinstance(self.loss, list):
            grads = tf.gradients(self.loss[0], d_vars) + tf.gradients(
                self.loss[1], g_vars)
        else:
            grads = tf.gradients(self.loss, current_vars)
        f_accum = [(self.config.f_decay or 0.95) * f + tf.square(g)
                   for f, g in zip(f1, grads)]
        self.gan.add_metric('f1',
                            tf.reduce_sum([tf.reduce_sum(f) for f in f1]))

        reg = [tf.multiply(f, d) for f, d in zip(f1, diff)]
        ewc_loss = (self.config.lam or 17.5) / 2.0 * tf.reduce_sum(
            [tf.reduce_sum(r) for r in reg])
        self.gan.add_metric('ewc', ewc_loss)

        save_weights = tf.group(*[
            tf.assign(w, v) for w, v in zip(tmp_vars, current_vars)
        ])  # store variables

        if isinstance(self.loss, list):
            new_grads = tf.gradients(self.loss[0] + ewc_loss,
                                     d_vars) + tf.gradients(
                                         self.loss[1] + ewc_loss, g_vars)
        else:
            new_grads = tf.gradients(self.loss + ewc_loss, current_vars)
        step = self.optimizer.apply_gradients(list(zip(new_grads,
                                                       current_vars)).copy(),
                                              global_step=global_step,
                                              name=name)

        store_f = tf.group(*[tf.assign(w, v) for w, v in zip(f1, f_accum)])
        with tf.get_default_graph().control_dependencies([store_f]):
            with tf.get_default_graph().control_dependencies([step]):
                with tf.get_default_graph().control_dependencies(
                    [save_weights]):
                    return tf.no_op()
Example #51
0
    def __call__(self, *args, **kwds):
        """Calls the graph function."""
        if self._created_variables:
            # In this case we have created variables on the first call, so we run the
            # defunned version which is guaranteed to never create variables.
            return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
        elif self._stateful_fn is not None:
            # In this case we have not created variables on the first call. So we can
            # run the first trace but we should fail if variables are created.
            results = self._stateful_fn(*args, **kwds)
            if self._created_variables:
                raise ValueError(
                    "Creating variables on a non-first call to a function"
                    " decorated with tf.function.")
            return results

        # This is the first call of __call__, so we have to initialize.
        self._initialize(args, kwds)
        if self._lifted_all_initializers and self._lifted_placeholders:
            with ops.init_scope():
                handles, placeholders = zip(*self._lifted_placeholders)
                if context.executing_eagerly():
                    lifted_fn = function_lib._EagerDefinedFunction(  # pylint: disable=protected-access
                        "initializer" + str(ops.uid()),
                        self._lifted_initializer_graph, placeholders, [], {})
                    with tape.stop_recording():
                        lifted_fn.call(context.context(), list(handles))
            return self._stateless_fn(*args, **kwds)
        canon_args, canon_kwds = self._canonicalize_function_inputs(args, kwds)

        if not self._created_variables:
            # If we did not create any variables the trace we have is good enough.
            return self._concrete_stateful_fn._filtered_call(
                canon_args, canon_kwds)  # pylint: disable=protected-access

        def fn_with_cond(*inner_args, **inner_kwds):
            """Conditionally runs initialization if it's needed."""
            condition = True
            for wr in self._created_variables:
                variable = wr()
                if variable is None:
                    raise ValueError(
                        "A tf.Variable created inside your tf.function has been"
                        " garbage-collected. Your code needs to keep Python references"
                        " to variables created inside `tf.function`s.\n"
                        "\n"
                        "A common way to raise this error is to create and return a"
                        " variable only referenced inside your function:\n"
                        "\n"
                        "@tf.function\n"
                        "def f():\n"
                        "  v = tf.Variable(1.0)\n"
                        "  return v\n"
                        "\n"
                        "v = f()  # Crashes with this error message!\n"
                        "\n"
                        "The reason this crashes is that @tf.function annotated"
                        " function returns a **`tf.Tensor`** with the **value** of the"
                        " variable when the function is called rather than the"
                        " variable instance itself. As such there is no code holding a"
                        " reference to the `v` created inside the function and Python"
                        " garbage collects it.\n"
                        "\n"
                        "The simplest way to fix this issue is to create variables"
                        " outside the function and capture them:\n"
                        "\n"
                        "v = tf.Variable(1.0)\n"
                        "\n"
                        "@tf.function\n"
                        "def f():\n"
                        "  return v\n"
                        "\n"
                        "f()  # <tf.Tensor: ... numpy=1.>\n"
                        "v.assign_add(1.)\n"
                        "f()  # <tf.Tensor: ... numpy=2.>")
                condition = math_ops.logical_and(
                    condition,
                    resource_variable_ops.var_is_initialized_op(
                        variable.handle))
            # We want to call stateless_fn if possible because it avoids recomputing
            # potentially expensive initializers.
            return control_flow_ops.cond(
                condition,
                lambda: self._stateless_fn(*inner_args, **inner_kwds),
                functools.partial(
                    self._concrete_stateful_fn._filtered_call,  # pylint: disable=protected-access
                    inner_args,
                    inner_kwds))

        return function_lib.defun(fn_with_cond)(*canon_args, **canon_kwds)
Example #52
0
    def _init_from_args(
        self,
        initial_value=None,
        trainable=None,
        collections=None,
        caching_device=None,
        name=None,
        dtype=None,
        constraint=None,
        synchronization=None,
        aggregation=None,
        distribute_strategy=None,
        shape=None,
    ):
        """Creates a variable.

        Args:
          initial_value: A `Tensor`, or Python object convertible to a `Tensor`,
            which is the initial value for the Variable. The initial value must have
            a shape specified unless `validate_shape` is set to False. Can also be a
            callable with no argument that returns the initial value when called.
            (Note that initializer functions from init_ops.py must first be bound
             to a shape before being used here.)
          trainable: If `True`, the default, also adds the variable to the graph
            collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as
            the default list of variables to use by the `Optimizer` classes.
            Defaults to `True`, unless `synchronization` is set to `ON_READ`, in
            which case it defaults to `False`.
          collections: List of graph collections keys. The new variable is added to
            these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`.
          caching_device: Optional device string or function describing where the
            Variable should be cached for reading.  Defaults to the Variable's
            device.  If not `None`, caches on another device.  Typical use is to
            cache on the device where the Ops using the Variable reside, to
            deduplicate copying through `Switch` and other conditional statements.
          name: Optional name for the variable. Defaults to `'Variable'` and gets
            uniquified automatically.
          dtype: If set, initial_value will be converted to the given type.
            If None, either the datatype will be kept (if initial_value is
           a Tensor) or float32 will be used (if it is a Python object convertible
           to a Tensor).
          constraint: An optional projection function to be applied to the variable
            after being updated by an `Optimizer` (e.g. used to implement norm
            constraints or value constraints for layer weights). The function must
            take as input the unprojected Tensor representing the value of the
            variable and return the Tensor for the projected value
            (which must have the same shape). Constraints are not safe to
            use when doing asynchronous distributed training.
          synchronization: Indicates when a distributed a variable will be
            aggregated. Accepted values are constants defined in the class
            `tf.VariableSynchronization`. By default the synchronization is set to
            `AUTO` and the current `DistributionStrategy` chooses
            when to synchronize.
          aggregation: Indicates how a distributed variable will be aggregated.
            Accepted values are constants defined in the class
            `tf.VariableAggregation`.
          distribute_strategy: DistributionStrategy under which this variable
            was created.
          shape: (optional) The shape of this variable. If None, the shape of
            `initial_value` will be used. When setting this argument to
            `tf.TensorShape(None)` (representing an unspecified shape), the variable
            can be assigned with values of different shapes.

        Raises:
          ValueError: If the initial value is not specified, or does not have a
            shape and `validate_shape` is `True`.

        @compatibility(eager)
        When Eager Execution is enabled, variables are never added to collections.
        It is not implicitly added to the `GLOBAL_VARIABLES` or
        `TRAINABLE_VARIABLES` collections, and the `collections` argument is
        ignored.
        @end_compatibility
        """
        (
            synchronization,
            aggregation,
            trainable,
        ) = variables.validate_synchronization_aggregation_trainable(
            synchronization, aggregation, trainable, name)
        if initial_value is None:
            raise ValueError("initial_value must be specified.")
        init_from_fn = callable(initial_value)

        if (isinstance(initial_value, ops.Tensor)
                and hasattr(initial_value, "graph")
                and initial_value.graph.building_function):
            raise ValueError(
                "Tensor-typed variable initializers must either be "
                "wrapped in an init_scope or callable "
                "(e.g., `tf.Variable(lambda : "
                "tf.truncated_normal([10, 40]))`) when building "
                "functions. Please file a feature request if this "
                "restriction inconveniences you.")

        if collections is None:
            collections = [ops.GraphKeys.GLOBAL_VARIABLES]
        if not isinstance(collections, (list, tuple, set)):
            raise ValueError(
                "collections argument to Variable constructor must be a list, tuple, "
                "or set. Got %s of type %s" % (collections, type(collections)))
        if constraint is not None and not callable(constraint):
            raise ValueError("The `constraint` argument must be a callable.")

        if isinstance(initial_value, trackable.CheckpointInitialValue):
            self._maybe_initialize_trackable()
            self._update_uid = initial_value.checkpoint_position.restore_uid
            initial_value = initial_value.wrapped_value

        if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections:
            collections = list(collections) + [
                ops.GraphKeys.TRAINABLE_VARIABLES
            ]
        with ops.init_scope():
            self._in_graph_mode = not context.executing_eagerly()
            with ops.name_scope(
                    name, "TrainableWrapper",
                [] if init_from_fn else [initial_value]) as name:
                # pylint: disable=protected-access
                handle_name = ops.name_from_scope_name(name)
                handle_name = handle_name or "TrainableWrapperHandle"
                if self._in_graph_mode:
                    shared_name = handle_name
                    unique_id = shared_name
                else:
                    # When in eager mode use a uid for the shared_name, to prevent
                    # accidental sharing.
                    unique_id = "%s_%d" % (handle_name, ops.uid())
                    shared_name = None  # Never shared
                # Use attr_scope and device(None) to simulate the behavior of
                # colocate_with when the variable we want to colocate with doesn't
                # yet exist.
                device_context_manager = (ops.device if self._in_graph_mode
                                          else ops.NullContextmanager)
                attr = attr_value_pb2.AttrValue(
                    list=attr_value_pb2.AttrValue.ListValue(
                        s=[compat.as_bytes("loc:@%s" % handle_name)]))
                with ops.get_default_graph()._attr_scope({"_class": attr}):
                    with ops.name_scope("Initializer"), device_context_manager(
                            None):
                        initial_value = ops.convert_to_tensor(
                            initial_value() if init_from_fn else initial_value,
                            name="initial_value",
                            dtype=dtype,
                        )
                    if shape is None:
                        shape = initial_value.shape
                    handle = resource_variable_ops.eager_safe_variable_handle(
                        initial_value=initial_value,
                        shape=None,  # shape,
                        shared_name=shared_name,
                        name=name,
                        graph_mode=self._in_graph_mode,
                    )
                # pylint: disable=protected-access
                if (self._in_graph_mode and initial_value is not None
                        and initial_value.op._get_control_flow_context()
                        is not None):
                    raise ValueError(
                        "Initializer for variable %s is from inside a control-flow "
                        "construct, such as a loop or conditional. When creating a "
                        "variable inside a loop or conditional, use a lambda as the "
                        "initializer." % name)
                # pylint: enable=protected-access
                dtype = initial_value.dtype.base_dtype

                if self._in_graph_mode:
                    with ops.name_scope("IsInitialized"):
                        is_initialized_op = (gen_resource_variable_ops.
                                             var_is_initialized_op(handle))
                    if initial_value is not None:
                        # pylint: disable=g-backslash-continuation
                        with ops.name_scope("Assign") as n, ops.colocate_with(
                                None, ignore_existing=True), ops.device(
                                    handle.device):
                            # pylint: disable=protected-access
                            initializer_op = gen_resource_variable_ops.assign_variable_op(
                                handle,
                                variables.
                                _try_guard_against_uninitialized_dependencies(
                                    name, initial_value),
                                name=n,
                            )
                            # pylint: enable=protected-access
                        # pylint: enable=g-backslash-continuation
                    with ops.name_scope("Read"):
                        # Manually assign reads to the handle's device to avoid log
                        # messages.
                        with ops.device(handle.device):
                            with ops.control_dependencies([
                                    gen_resource_variable_ops.
                                    assign_variable_op(
                                        handle,
                                        self.prefetch_values(),
                                        name="AssignBeforeInitRead",
                                    )
                            ]):
                                value = gen_resource_variable_ops.read_variable_op(
                                    handle, dtype)
                        graph_element = value
                        if caching_device is not None:
                            # Variables may be created in a tf.device() or ops.colocate_with()
                            # context. At the same time, users would expect caching device to
                            # be independent of this context, and/or would not expect the
                            # current device context to be merged with the caching device
                            # spec.  Therefore we reset the colocation stack before creating
                            # the cached value. Note that resetting the colocation stack will
                            # also reset the device stack.
                            with ops.colocate_with(None, ignore_existing=True):
                                with ops.device(caching_device):
                                    cached_value = array_ops.identity(value)
                        else:
                            cached_value = None
                else:
                    gen_resource_variable_ops.assign_variable_op(
                        handle, initial_value)
                    is_initialized_op = None
                    initializer_op = None
                    graph_element = None
                    if caching_device:
                        with ops.device(caching_device):
                            with ops.control_dependencies([
                                    gen_resource_variable_ops.
                                    assign_variable_op(
                                        handle,
                                        self.prefetch_values(),
                                        name="AssignBeforeInitRead",
                                    )
                            ]):
                                cached_value = (
                                    gen_resource_variable_ops.read_variable_op(
                                        handle, dtype))
                    else:
                        cached_value = None
                if not context.executing_eagerly():
                    # Eager variables are only added to collections if they are part of an
                    # eager variable store (otherwise in an interactive session they would
                    # hog memory and cause OOM). This is done in ops/variable_scope.py.
                    ops.add_to_collections(collections, self)
                elif ops.GraphKeys.GLOBAL_STEP in collections:
                    ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self)
            initial_value = initial_value if self._in_graph_mode else None
            super(resource_variable_ops.ResourceVariable, self).__init__(
                trainable=trainable,
                shape=shape,
                dtype=dtype,
                handle=handle,
                synchronization=synchronization,
                constraint=constraint,
                aggregation=aggregation,
                distribute_strategy=distribute_strategy,
                name=name,
                unique_id=unique_id,
                handle_name=handle_name,
                graph_element=graph_element,
                initial_value=initial_value,
                initializer_op=initializer_op,
                is_initialized_op=is_initialized_op,
                cached_value=cached_value,
            )
Example #53
0
 def variable(self, name, value, dtype):
     with ops.init_scope():
         if name not in self.variables:
             self.variables[name] = variables.Variable(value, dtype=dtype)
             self.evaluate(self.variables[name].initializer)
     return self.variables[name]
def _zero_debias(strategy, unbiased_var, value, decay):
    """Compute the delta required for a debiased Variable.

  All exponential moving averages initialized with Tensors are initialized to 0,
  and therefore are biased to 0. Variables initialized to 0 and used as EMAs are
  similarly biased. This function creates the debias updated amount according to
  a scale factor, as in (Kingma et al., 2015).

  To demonstrate the bias the results from 0-initialization, take an EMA that
  was initialized to `0` with decay `b`. After `t` timesteps of seeing the
  constant `c`, the variable have the following value:

  ```
    EMA = 0*b^(t) + c*(1 - b)*b^(t-1) + c*(1 - b)*b^(t-2) + ...
        = c*(1 - b^t)
  ```

  To have the true value `c`, we would divide by the scale factor `1 - b^t`.

  In order to perform debiasing, we use two shadow variables. One keeps track of
  the biased estimate, and the other keeps track of the number of updates that
  have occurred.

  Args:
    strategy: `Strategy` used to create and update variables.
    unbiased_var: A Variable representing the current value of the unbiased EMA.
    value: A Tensor representing the most recent value.
    decay: A Tensor representing `1-decay` for the EMA.

  Returns:
    The amount that the unbiased variable should be updated. Computing this
    tensor will also update the shadow variables appropriately.

  References:
    Adam - A Method for Stochastic Optimization:
      [Kingma et al., 2015](https://arxiv.org/abs/1412.6980)
      ([pdf](https://arxiv.org/pdf/1412.6980.pdf))

  """
    with variable_scope.variable_scope(unbiased_var.name[:-len(":0")],
                                       values=[unbiased_var, value, decay]):
        with ops.init_scope():
            biased_initializer = init_ops.zeros_initializer()
            local_step_initializer = init_ops.zeros_initializer()

        def _maybe_get_unique(name):
            """Get name for a unique variable, if not `reuse=True`."""
            if variable_scope.get_variable_scope().reuse:
                return name
            vs_vars = [
                x.op.name for x in
                variable_scope.get_variable_scope().global_variables()
            ]
            full_name = variable_scope.get_variable_scope().name + "/" + name
            if full_name not in vs_vars:
                return name
            idx = 1
            while full_name + ("_%d" % idx) in vs_vars:
                idx += 1
            return name + ("_%d" % idx)

        with strategy.extended.colocate_vars_with(unbiased_var):
            biased_var = variable_scope.get_variable(
                _maybe_get_unique("biased"),
                initializer=biased_initializer,
                shape=unbiased_var.get_shape(),
                dtype=unbiased_var.dtype,
                trainable=False)
            local_step = variable_scope.get_variable(
                _maybe_get_unique("local_step"),
                shape=[],
                dtype=unbiased_var.dtype,
                initializer=local_step_initializer,
                trainable=False)

    def update_fn(v, value, biased_var, local_step):
        update_biased = state_ops.assign_sub(biased_var,
                                             (biased_var - value) * decay)
        update_local_step = local_step.assign_add(1)

        # This function gets `1 - decay`, so use `1.0 - decay` in the exponent.
        bias_factor = 1 - math_ops.pow(1.0 - decay, update_local_step)
        return state_ops.assign(v,
                                update_biased / bias_factor,
                                name=ops.get_name_scope() + "/")

    return strategy.extended.update(unbiased_var,
                                    update_fn,
                                    args=(value, biased_var, local_step))
Example #55
0
    def add_weight(self,
                   name,
                   shape,
                   dtype=None,
                   initializer=None,
                   regularizer=None,
                   trainable=None,
                   constraint=None,
                   use_resource=None,
                   synchronization=vs.VariableSynchronization.AUTO,
                   aggregation=vs.VariableAggregation.NONE,
                   partitioner=None,
                   **kwargs):
        """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
        Note, if the current variable scope is marked as non-trainable
        then this parameter is ignored and any added variables are also
        marked as non-trainable. `trainable` defaults to `True` unless
        `synchronization` is set to `ON_READ`.
      constraint: constraint instance (callable).
      use_resource: Whether to use `ResourceVariable`.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
        `AUTO` and the current `DistributionStrategy` chooses
        when to synchronize. If `synchronization` is set to `ON_READ`,
        `trainable` must not be set to `True`.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.
      partitioner: (optional) partitioner instance (callable).  If
        provided, when the requested variable is created it will be split
        into multiple partitions according to `partitioner`.  In this case,
        an instance of `PartitionedVariable` is returned.  Available
        partitioners include `tf.compat.v1.fixed_size_partitioner` and
        `tf.compat.v1.variable_axis_size_partitioner`.  For more details, see
        the documentation of `tf.compat.v1.get_variable` and the  "Variable
        Partitioners and Sharding" section of the API guide.
      **kwargs: Additional keyword arguments.

    Returns:
      The created variable.  Usually either a `Variable` or `ResourceVariable`
      instance.  If `partitioner` is not `None`, a `PartitionedVariable`
      instance is returned.

    Raises:
      RuntimeError: If called with partitioned variable regularization and
        eager execution is enabled.
      ValueError: When trainable has been set to True with synchronization
        set as `ON_READ`.
    """
        for kwarg in kwargs:
            if kwarg != 'experimental_autocast':
                raise TypeError('Unknown keyword argument:', kwarg)
        if self._keras_style:
            return super(Layer, self).add_weight(
                name=name,
                shape=shape,
                dtype=dtype,
                initializer=initializer,
                regularizer=regularizer,
                trainable=trainable and self.trainable,
                constraint=constraint,
                use_resource=use_resource,
                synchronization=vs.VariableSynchronization.AUTO,
                aggregation=vs.VariableAggregation.NONE,
                partitioner=partitioner,
                **kwargs)

        if synchronization == vs.VariableSynchronization.ON_READ:
            if trainable:
                raise ValueError(
                    'Synchronization value can be set to '
                    'VariableSynchronization.ON_READ only for non-trainable variables. '
                    'You have specified trainable=True and '
                    'synchronization=VariableSynchronization.ON_READ.')
            else:
                # Set trainable to be false when variable is to be synced on read.
                trainable = False
        elif trainable is None:
            trainable = True

        def _should_add_regularizer(variable, existing_variable_set):
            if base_layer_utils.is_split_variable(variable):
                for var in variable:
                    if var in existing_variable_set:
                        return False
                return True
            else:
                return variable not in existing_variable_set

        init_graph = None
        if not context.executing_eagerly():
            default_graph = ops.get_default_graph()
            if default_graph.building_function:
                with ops.init_scope():
                    # Retrieve the variables from the graph into which variables
                    # will be lifted; if initialization ops will be lifted into
                    # the eager context, then there is nothing to retrieve, since variable
                    # collections are not supported when eager execution is enabled.
                    if not context.executing_eagerly():
                        init_graph = ops.get_default_graph()
                        existing_variables = set(
                            tf_variables.global_variables())
            else:
                # Initialization ops will not be lifted out of the default graph.
                init_graph = default_graph
                existing_variables = set(tf_variables.global_variables())

        if dtype is None:
            dtype = self.dtype or dtypes.float32

        self._set_scope(None)
        reuse = self.built or self._reuse
        prev_len_trainable = len(self._trainable_weights)
        with vs.variable_scope(self._scope,
                               reuse=reuse,
                               auxiliary_name_scope=False) as scope:
            self._current_scope = scope
            with backend.name_scope(self._name_scope()):
                use_resource = (use_resource or self._use_resource_variables
                                or scope.use_resource)
                if initializer is None:
                    initializer = scope.initializer
                variable = super(Layer, self).add_weight(
                    name,
                    shape,
                    dtype=dtypes.as_dtype(dtype),
                    initializer=initializer,
                    trainable=trainable and self.trainable,
                    constraint=constraint,
                    partitioner=partitioner,
                    use_resource=use_resource,
                    synchronization=synchronization,
                    aggregation=aggregation,
                    getter=vs.get_variable,
                    **kwargs)

                if regularizer:
                    if (ops.executing_eagerly_outside_functions()
                            or _should_add_regularizer(variable,
                                                       existing_variables)):
                        self._handle_weight_regularization(
                            name, variable, regularizer)

                if init_graph is not None:
                    # Handle edge case where a custom getter has overridden `trainable`.
                    # There is one known occurrence of this, in unit test
                    # testBasicRNNCellNotTrainable in
                    # contrib.rnn.python.kernel_tests.core_rnn_cell_test
                    with init_graph.as_default():
                        trainable_variables = tf_variables.trainable_variables(
                        )
                    if (trainable and self.trainable
                            and variable not in trainable_variables):
                        # A custom getter / variable scope overrode the trainable flag.
                        extra_trainable_vars = self._trainable_weights[
                            prev_len_trainable:]
                        self._trainable_weights = self._trainable_weights[:
                                                                          prev_len_trainable]
                        self._non_trainable_weights += extra_trainable_vars
        return variable
    def _real_mirrored_creator(devices, *args, **kwargs):
      """Creates one MirroredVariable on the current worker."""
      unique_var_name = ops.get_default_graph().unique_name(
          kwargs["name"], mark_as_used=False).rstrip("/")
      # pylint: disable=protected-access
      collective_instance_key = self._collective_keys.get_instance_key(
          key_id=unique_var_name)
      # Only the first device participles in the broadcast of initial values.
      group_key = self._collective_keys.get_group_key([devices[0]])
      group_size = self._num_workers
      if "initial_value" not in kwargs:
        raise ValueError("Initial value must be specified.")
      initial_value = kwargs["initial_value"]
      if callable(initial_value):
        initial_value_fn = initial_value
      else:
        initial_value_fn = lambda: initial_value

      value_list = []
      for i, d in enumerate(devices):
        with ops.init_scope(), ops.device(d):
          if i == 0:
            # The initial value fn makes sure variables all initialized to
            # same values. The first device of the chief worker will send their
            # variable values to other workers.
            def _overridden_initial_value_fn(device=d, index=i):  # pylint: disable=g-missing-docstring
              with ops.device(device):
                initial_value = initial_value_fn()
                assert not callable(initial_value)
                initial_value = ops.convert_to_tensor(initial_value)

                assert index == 0, index
                if self._num_workers > 1:
                  if self._is_chief:
                    bcast_send = collective_ops.broadcast_send(
                        initial_value, initial_value.shape, initial_value.dtype,
                        group_size, group_key, collective_instance_key)
                    with ops.control_dependencies([bcast_send]):
                      return array_ops.identity(initial_value)
                  else:
                    return collective_ops.broadcast_recv(
                        initial_value.shape, initial_value.dtype, group_size,
                        group_key, collective_instance_key)
                return initial_value
          else:
            # Give replicas meaningful distinct names:
            var0name = value_list[0].name.split(":")[0]
            # We append a / to variable names created on replicas with id > 0 to
            # ensure that we ignore the name scope and instead use the given
            # name as the absolute name of the variable.
            kwargs["name"] = "%s/replica_%d/" % (var0name, i)

            # Variables on non-first replica get initial values from the
            # variables created on the first device of each worker.
            def _overridden_initial_value_fn(device=d, index=i):
              assert index > 0
              with ops.device(device):
                if context.executing_eagerly():
                  return array_ops.identity(value_list[0].value())
                else:
                  return array_ops.identity(value_list[0].initial_value)

          kwargs["initial_value"] = _overridden_initial_value_fn
          with context.device_policy(context.DEVICE_PLACEMENT_SILENT):
            # Don't record operations (e.g. other variable reads) during
            # variable creation.
            with tape.stop_recording():
              v = next_creator(*args, **kwargs)

          if i == 0:
            actual_var_name = v.name.split(":")[0]
            assert unique_var_name == actual_var_name, "%r vs %r" % (
                unique_var_name, actual_var_name)
          assert not isinstance(v, values.DistributedVariable)
          value_list.append(v)
      return value_list
Example #57
0
    def load(self, tags):
        """Creates an object from the MetaGraph identified by `tags`."""
        meta_graph_def = self.get_meta_graph_def_from_tags(tags)
        load_shared_name_suffix = "_load_{}".format(ops.uid())
        functions = function_deserialization.load_function_def_library(
            meta_graph_def.graph_def.library,
            load_shared_name_suffix=load_shared_name_suffix)
        # Replace existing functions in the MetaGraphDef with renamed functions so
        # we don't have duplicates or name collisions.
        meta_graph_def.graph_def.library.Clear()
        for function in functions.values():
            meta_graph_def.graph_def.library.function.add().CopyFrom(
                function.function_def)
        # We've renamed functions and shared names. We need the same operation on
        # the GraphDef itself for consistency.
        for node_def in meta_graph_def.graph_def.node:
            function_deserialization.fix_node_def(
                node_def,
                functions,
                load_shared_name_suffix,
                debug_name="MetaGraph import")

        load_graph_returns = [None]
        wrapped = wrap_function.wrap_function(functools.partial(
            self.load_graph, load_graph_returns, meta_graph_def),
                                              signature=[])
        saver, = load_graph_returns
        self.restore_variables(wrapped, saver)
        with wrapped.graph.as_default():
            init_op = loader_impl.get_init_op(
                meta_graph_def
            ) or monitored_session.Scaffold.default_local_init_op()
            # Add a dummy Tensor we know we can fetch to add control dependencies to.
            init_anchor = constant_op.constant(0., name="dummy_fetch")

        root = tracking.AutoTrackable()
        asset_feed_tensors = []
        asset_paths = []
        for tensor_name, value in loader_impl.get_asset_tensors(
                self._export_dir, meta_graph_def).items():
            asset_feed_tensors.append(
                wrapped.graph.as_graph_element(tensor_name))
            asset_paths.append(tracking.TrackableAsset(value))
        init_fn = wrapped.prune(
            feeds=asset_feed_tensors,
            fetches=[init_anchor,
                     wrapped.graph.as_graph_element(init_op)])
        initializer = _Initializer(init_fn, asset_paths)
        # pylint: disable=protected-access
        local_init_op, _ = initializer._initialize()
        # pylint: enable=protected-access
        with ops.init_scope():
            if not context.executing_eagerly():
                ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS,
                                      local_init_op)
                for variable in wrapped.graph.get_collection_ref(
                        ops.GraphKeys.LOCAL_VARIABLES):
                    # pylint: disable=protected-access
                    variable._initializer_op = local_init_op
                    # pylint: enable=protected-access
        root.initializer = initializer
        root.asset_paths = asset_paths
        signature_functions = self._extract_signatures(wrapped, meta_graph_def)

        root.signatures = signature_serialization.create_signature_map(
            signature_functions)
        root.variables = list(wrapped.graph.variables)
        root.tensorflow_version = (
            meta_graph_def.meta_info_def.tensorflow_version)
        root.tensorflow_git_version = (
            meta_graph_def.meta_info_def.tensorflow_git_version)
        root.graph = wrapped.graph
        root.prune = wrapped.prune
        return root
Example #58
0
def _create_keras_history_helper(tensors, processed_ops, created_layers):
    """Helper method for `create_keras_history`.

  Arguments:
    tensors: A structure of Tensors for which to create Keras metadata.
    processed_ops: Set. TensorFlow operations that have already been wrapped in
      `TensorFlowOpLayer` instances.
    created_layers: List. The `TensorFlowOpLayer` instances created.

  Returns:
    Tuple. First element is the updated set of TensorFlow Operations that
    have been wrapped in `TensorFlowOpLayer` instances. Second element is
    a list of the `TensorFlowOpLayer` instances created.
  """
    # Import of `base_layer` needed in order to create `TensorFlowOpLayer`.
    # Cannot be imported at top because of circular dependencies.
    # TODO(omalleyt): Resolve circular dependency.
    from tensorflow.python.keras.engine import base_layer  # pylint: disable=g-import-not-at-top
    tensor_list = nest.flatten(tensors)
    for tensor in tensor_list:
        if getattr(tensor, '_keras_history', None) is not None:
            continue
        op = tensor.op  # The Op that created this Tensor.
        if op not in processed_ops:
            if op.type.startswith('Sparse'):
                lambda_example = """
        weights_mult = lambda x: tf.sparse.sparse_dense_matmul(x, weights)
        output = tf.keras.layers.Lambda(weights_mult)(input)
        """
                raise ValueError(
                    'Sparse ops are not supported with functional models with built-in '
                    'layer wrapping. Please wrap the sparse ops in a Lambda layer like'
                    ': \n{lambda_example}\n'.format(
                        lambda_example=lambda_example))

            # Recursively set `_keras_history`.
            op_inputs = list(op.inputs)
            constants = {}
            layer_inputs = []
            for i, op_input in enumerate(op_inputs):
                if uses_keras_history(op_input):
                    layer_inputs.append(op_input)
                else:
                    # Treat any value not originating from a `keras.Input` as
                    # a constant. Variables cannot be supported.
                    ds_with_session = (
                        distribution_strategy_context.in_cross_replica_context(
                        ) and not ops.executing_eagerly_outside_functions())
                    using_xla = control_flow_util.GraphOrParentsInXlaContext(
                        ops.get_default_graph())
                    if ds_with_session or using_xla:
                        # In Legacy Graph mode, evaluating here makes Session be
                        # configured improperly. The downside of this is that saving
                        # via `get_config` breaks, but SavedModel still works.
                        constants[i] = op_input
                    else:
                        with ops.init_scope():
                            constants[i] = backend.function([], op_input)([])
            layer_inputs = unnest_if_single_tensor(layer_inputs)
            processed_ops, created_layers = _create_keras_history_helper(
                layer_inputs, processed_ops, created_layers)
            name = op.name
            node_def = op.node_def.SerializeToString()
            op_layer = base_layer.TensorFlowOpLayer(node_def,
                                                    constants=constants,
                                                    name=name)
            created_layers.append(op_layer)
            op_layer._add_inbound_node(  # pylint: disable=protected-access
                layer_inputs, op.outputs)
            processed_ops.update([op])
    return processed_ops, created_layers
Example #59
0
    def _add_variable_with_custom_getter(self,
                                         name,
                                         shape=None,
                                         dtype=dtypes.float32,
                                         initializer=None,
                                         getter=None,
                                         overwrite=False,
                                         **kwargs_for_getter):
        """Restore-on-create for a variable be saved with this `Checkpointable`.

    If the user has requested that this object or another `Checkpointable` which
    depends on this object be restored from a checkpoint (deferred loading
    before variable object creation), `initializer` may be ignored and the value
    from the checkpoint used instead.

    Args:
      name: A name for the variable. Must be unique within this object.
      shape: The shape of the variable.
      dtype: The data type of the variable.
      initializer: The initializer to use. Ignored if there is a deferred
        restoration left over from a call to
        `_restore_from_checkpoint_position`.
      getter: The getter to wrap which actually fetches the variable.
      overwrite: If True, disables unique name and type checks.
      **kwargs_for_getter: Passed to the getter.

    Returns:
      The new variable object.

    Raises:
      ValueError: If the variable name is not unique.
    """
        self._maybe_initialize_checkpointable()
        with ops.init_scope():
            if context.executing_eagerly():
                # If this is a variable with a single Tensor stored in the checkpoint,
                # we can set that value as an initializer rather than initializing and
                # then assigning (when executing eagerly). This call returns None if
                # there is nothing to restore.
                checkpoint_initializer = self._preload_simple_restoration(
                    name=name, shape=shape)
            else:
                checkpoint_initializer = None
            if (checkpoint_initializer is not None and
                    not (isinstance(initializer, CheckpointInitialValue) and
                         (initializer.restore_uid >
                          checkpoint_initializer.restore_uid))):
                # If multiple Checkpointable objects are "creating" the same variable
                # via the magic of custom getters, the one with the highest restore UID
                # (the one called last) has to make the final initializer. If another
                # custom getter interrupts this process by overwriting the initializer,
                # then we'll catch that when we call _track_checkpointable. So this is
                # "best effort" to set the initializer with the highest restore UID.
                initializer = checkpoint_initializer
                shape = None
        new_variable = getter(name=name,
                              shape=shape,
                              dtype=dtype,
                              initializer=initializer,
                              **kwargs_for_getter)

        # If we set an initializer and the variable processed it, tracking will not
        # assign again. It will add this variable to our dependencies, and if there
        # is a non-trivial restoration queued, it will handle that. This also
        # handles slot variables.
        if not overwrite or isinstance(new_variable, CheckpointableBase):
            return self._track_checkpointable(new_variable,
                                              name=name,
                                              overwrite=overwrite)
        else:
            # TODO(allenl): Some variable types are not yet supported. Remove this
            # fallback once all get_variable() return types are Checkpointable.
            return new_variable
Example #60
0
    def __init__(
            self,  # pylint: disable=super-init-not-called
            initial_value=None,
            trainable=None,
            caching_device=None,
            name=None,
            dtype=None,
            constraint=None,
            add_initializers_to=None,
            **unused_kwargs):
        """Creates a variable.

    Args:
      initial_value: A `Tensor`, or Python object convertible to a `Tensor`,
        which is the initial value for the Variable. The initial value must have
        a shape specified unless `validate_shape` is set to False. Can also be a
        callable with no argument that returns the initial value when called.
        (Note that initializer functions from init_ops.py must first be bound
         to a shape before being used here.)
      trainable: If `True`, GradientTapes automatically watch uses of this
        Variable.
      caching_device: Optional device string or function describing where the
        Variable should be cached for reading.  Defaults to the Variable's
        device.  If not `None`, caches on another device.  Typical use is to
        cache on the device where the Ops using the Variable reside, to
        deduplicate copying through `Switch` and other conditional statements.
      name: Optional name for the variable. Defaults to `'Variable'` and gets
        uniquified automatically.
      dtype: If set, initial_value will be converted to the given type.
        If None, either the datatype will be kept (if initial_value is
       a Tensor) or float32 will be used (if it is a Python object convertible
       to a Tensor).
      constraint: An optional projection function to be applied to the variable
        after being updated by an `Optimizer` (e.g. used to implement norm
        constraints or value constraints for layer weights). The function must
        take as input the unprojected Tensor representing the value of the
        variable and return the Tensor for the projected value
        (which must have the same shape). Constraints are not safe to
        use when doing asynchronous distributed training.
      add_initializers_to: if not None and not in legacy graph mode, the
        initializer tensor will be added to this map instead of adding the
        assignment to the function.

    Raises:
      ValueError: If the initial value is not specified, or does not have a
        shape and `validate_shape` is `True`.
      RuntimeError: If called outside of a function definition.
    """
        if context.executing_eagerly():
            # If we've been init_scope()d out of the function definition nothing to do
            # here; we can't really do the capturing or conditional logic.
            resource_variable_ops.ResourceVariable.__init__(
                self,
                initial_value=initial_value,
                trainable=trainable,
                caching_device=caching_device,
                name=name,
                dtype=dtype,
                constraint=constraint)
            return
        with ops.init_scope():
            self._in_graph_mode = not context.executing_eagerly()
        if initial_value is None:
            raise ValueError("initial_value must be specified.")
        init_from_fn = callable(initial_value)

        if constraint is not None and not callable(constraint):
            raise ValueError("The `constraint` argument must be a callable.")

        if isinstance(initial_value, checkpointable.CheckpointInitialValue):
            self._maybe_initialize_checkpointable()
            self._update_uid = initial_value.checkpoint_position.restore_uid
            initial_value = initial_value.wrapped_value

        if trainable is None:
            trainable = True
        self._trainable = trainable
        self._save_slice_info = None
        self._initial_value = None
        self._initializer_op = None
        self._is_initialized_op = None
        self._graph_element = None
        self._cached_value = None
        # Store the graph key so optimizers know how to only retrieve variables from
        # this graph. Guaranteed to be the same as the eager graph_key.
        self._graph_key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
        with ops.name_scope(name, "Variable",
                            [] if init_from_fn else [initial_value]) as name:
            # pylint: disable=protected-access
            with ops.init_scope():
                shared_name = ops._name_from_scope_name(name)
                shared_name = "%s_%d" % (shared_name, ops.uid())
            with ops.name_scope("Initializer"), ops.device(None):
                initial_value = ops.convert_to_tensor(
                    initial_value() if init_from_fn else initial_value,
                    name="initial_value",
                    dtype=dtype)
            with ops.init_scope():
                self._handle = resource_variable_ops.eager_safe_variable_handle(
                    shape=initial_value.get_shape(),
                    dtype=initial_value.dtype.base_dtype,
                    shared_name=shared_name,
                    name=name,
                    graph_mode=self._in_graph_mode)
            self._shape = initial_value.shape
            self._unique_id = shared_name
            self._handle_name = shared_name + ":0"
            self._dtype = initial_value.dtype.base_dtype
            self._constraint = constraint
            assert initial_value is not None
            if self._in_graph_mode:
                with ops.init_scope():
                    outer_graph = ops.get_default_graph()
                lifted_initializer = lift_to_graph.lift_to_graph(
                    initial_value, outer_graph)[initial_value]
                with ops.init_scope():
                    self._initial_value = lifted_initializer
                    with ops.name_scope("IsInitialized"):
                        self._is_initialized_op = (
                            resource_variable_ops.var_is_initialized_op(
                                self._handle))
                    if initial_value is not None:
                        with ops.name_scope("Assign") as n, ops.colocate_with(
                                self._handle):
                            self._initializer_op = resource_variable_ops.assign_variable_op(
                                self._handle, lifted_initializer, name=n)
                    with ops.name_scope("Read"), ops.colocate_with(
                            self._handle):
                        # Manually assign reads to the handle's device to avoid log
                        # messages.
                        with ops.device(self._handle.device):
                            value = self._read_variable_op()
                        self._graph_element = value
                    ops.add_to_collection(ops.GraphKeys.GLOBAL_VARIABLES, self)
            else:
                if add_initializers_to is not None:
                    add_initializers_to[self] = initial_value
                else:

                    def assign_fn():
                        with ops.name_scope("Assign") as n, ops.colocate_with(
                                self._handle):
                            resource_variable_ops.assign_variable_op(
                                self._handle, initial_value, name=n)
                            # Returning values to keep tf.cond happy.
                        return ops.convert_to_tensor(1)

                    def not_assign_fn():
                        return ops.convert_to_tensor(0)

                    # Note: this cond is always guaranteed to run because we're inside a
                    # defun which will insert automatic control dependencies.
                    control_flow_ops.cond(
                        resource_variable_ops.var_is_initialized_op(
                            self._handle), not_assign_fn, assign_fn)

        # After the handle has been created, set up a way to clean it up when
        # executing eagerly. We'll hold the only reference to the deleter, so that
        # when this object is garbage collected the deleter will be too. This
        # means ResourceVariables can be part of reference cycles without those
        # cycles being uncollectable.
        if not self._in_graph_mode:
            self._handle_deleter = resource_variable_ops.EagerResourceDeleter(
                handle=self._handle, handle_device=self._handle.device)
        self._cached_shape_as_list = None