Beispiel #1
0
 def _build_learning_rate(self, learning_rate):
     if isinstance(learning_rate,
                   learning_rate_schedule.LearningRateSchedule):
         # Create a variable to hold the current learning rate.
         # Note that the init value `learning_rate(self.iterations)` should have
         # the correct layout information from self.iterations.
         self._current_learning_rate = dtensor.DVariable(
             learning_rate(self.iterations),
             name='learning_rate',
             dtype=tf.float32)
         return learning_rate
     init_val = tf.constant(learning_rate, dtype=tf.float32)
     if self._mesh:
         init_val = dtensor.copy_to_mesh(
             init_val, dtensor.Layout.replicated(self._mesh, rank=0))
     return dtensor.DVariable(init_val, name='learning_rate')
Beispiel #2
0
 def _create_iteration_variable(self):
     init_val = tf.constant(0, dtype=tf.int64)
     if self._mesh:
         init_val = dtensor.copy_to_mesh(
             init_val, dtensor.Layout.replicated(self._mesh, rank=0))
     with tf.init_scope():
         # Lift the variable creation to init scope to avoid environment issue.
         self._iterations = dtensor.DVariable(init_val, name='iteration')
Beispiel #3
0
 def test_build_index_dict(self):
     optimizer = optimizers.Adam(mesh=self.mesh)
     variable_init_value = tf.ones(shape=(),
                                   dtype=tf.float32,
                                   layout=dtensor.Layout.replicated(
                                       self.mesh, rank=0))
     var_list = [
         dtensor.DVariable(variable_init_value, name=f'var{i}')
         for i in range(10)
     ]
     optimizer._build_index_dict(var_list)
     self.assertEqual(
         optimizer._index_dict[optimizer._var_key(var_list[7])], 7)
Beispiel #4
0
 def test_add_variable_from_reference(self):
     optimizer = optimizers.Adam(mesh=self.mesh)
     variable_init_value = tf.ones([4, 4],
                                   dtype=tf.float32,
                                   layout=dtensor.Layout.replicated(
                                       self.mesh, rank=2))
     model_variable = dtensor.DVariable(variable_init_value,
                                        trainable=True,
                                        name='tmp')
     state_variable = optimizer.add_variable_from_reference(
         model_variable, 'test')
     self.assertEqual(state_variable._shared_name, 'test/tmp')
     self.assertAllClose(self.evaluate(state_variable), tf.zeros([4, 4]))
     # Make sure the variable contains the correct layout info
     self.assertEqual(state_variable.layout, model_variable.layout)
Beispiel #5
0
def make_variable(name,
                  shape=None,
                  dtype=tf.float32,
                  initializer=None,
                  layout=None,
                  trainable=None,
                  caching_device=None,
                  validate_shape=True,
                  constraint=None,
                  use_resource=None,
                  collections=None,
                  synchronization=tf.VariableSynchronization.AUTO,
                  aggregation=tf.VariableAggregation.NONE,
                  partitioner=None):
    # Note that this function is copied from keras.engine.base_layer_utils.
    # The only part that is changed are the usage of tf.Variable. The original
    # version was using tf.compat.v1.Variable for backward compat for estimator.
    initializing_from_value = False
    if initializer is not None and not callable(initializer):
        initializing_from_value = True

    if initializing_from_value:
        init_val = initializer
        variable_dtype = None
    else:
        # Instantiate initializer if provided initializer is a type object.
        if tf_inspect.isclass(initializer):
            initializer = initializer()
        init_val = functools.partial(initializer,
                                     shape,
                                     dtype=dtype,
                                     layout=layout)
        variable_dtype = dtype.base_dtype

    variable_shape = tf.TensorShape(shape)

    return dtensor.DVariable(initial_value=init_val,
                             name=name,
                             trainable=trainable,
                             caching_device=caching_device,
                             dtype=variable_dtype,
                             validate_shape=validate_shape,
                             constraint=constraint,
                             synchronization=synchronization,
                             aggregation=aggregation,
                             shape=variable_shape if variable_shape else None)
Beispiel #6
0
    def test_apply_gradients(self, optimizer_cls, init_args,
                             expect_variable_names):
        optimizer = optimizer_cls(mesh=self.mesh, **init_args)

        self.assertEqual(self.evaluate(optimizer.iterations), 0)
        self.assertEqual(optimizer.iterations.layout,
                         dtensor.Layout.replicated(self.mesh, rank=0))

        variable_init_value = tf.ones([4, 4],
                                      dtype=tf.float32,
                                      layout=dtensor.Layout.replicated(
                                          self.mesh, rank=2))
        model_variable = dtensor.DVariable(variable_init_value, trainable=True)

        grads = tf.ones_like(variable_init_value)
        optimizer.apply_gradients(zip([grads], [model_variable]))
        optimizer_variables = optimizer.variables

        self.assertEqual(self.evaluate(optimizer.iterations), 1)

        all_names = [var._shared_name for var in optimizer_variables]
        expect_variable_names.extend(['iteration', 'learning_rate'])
        self.assertCountEqual(all_names, expect_variable_names)
Beispiel #7
0
def _create_dvariable(layout_map, object_path, variable):
  """Create a new variable instead of using the LazyInitVariable.

  We choose to do this since even the LazyInitVariable might behavior like
  a normal tf.Variable/DVariable, it is not future proof for any new changes
  to variable class. It will also fail the instance type check in python,
  which could affect user's code when they do any filtering based on type to
  find any variables.

  Args:
    layout_map: a LayoutMap which contains the variable_object_path (string) ->
      Layout.
    object_path: string, the object attribute path for the variable.
    variable: LazyInitVariable which will be replaced by the newly created
      tf.Variable.
  Returns:
    A new tf.Variable with correct layout information.
  """
  # TODO(scottzhu): Revisit this in future and see if we can just reuse the
  # LazyInitVariable rather than creating a new tf.Variable instance.
  layout = layout_map[object_path]
  if layout is None:
    variable_rank = variable.shape.rank
    layout = dtensor.Layout.replicated(
        mesh=layout_map.get_default_mesh(),
        rank=variable_rank)
  init_val = variable._initial_value  # pylint: disable=protected-access
  if callable(init_val):
    init_val = utils.call_with_layout(init_val, layout)
  else:
    # The init value is probably already created as a tensor, we will just copy
    # it to mesh and give it a proper layout.
    init_val = dtensor.copy_to_mesh(init_val, layout)
  new_variable = dtensor.DVariable(init_val,
                                   trainable=variable.trainable,
                                   name=variable.name)
  return new_variable
Beispiel #8
0
    def add_variable_from_reference(self,
                                    model_variable,
                                    variable_name,
                                    initial_value=None):
        """Create an optimizer variable from model variable.

    Create an optimizer variable based on the information of model variable.
    For example, in SGD optimizer momemtum, for each model variable, a
    corresponding momemtum variable is created of the same shape and dtype.

    Args:
      model_variable: The corresponding model variable to the optimizer variable
        to be created.
      variable_name: The name prefix of the optimizer variable to be created.
        The create variables name will follow the pattern
        `{variable_name}/{model_variable.name}`, e.g., `momemtum/dense_1`.
      initial_value: The initial value of the optimizer variable, if None, the
        value will be default to 0.

    Returns:
      An optimizer variable.
    """
        if initial_value is None:
            # Use tf.zeros_like which will propagate the layout information from the
            # model weights if any.
            initial_value = tf.zeros_like(model_variable)
        elif isinstance(initial_value, tf.Tensor):
            initial_value = dtensor.copy_to_mesh(
                initial_value,
                dtensor.Layout.replicated(self._mesh,
                                          rank=initial_value.shape.rank))
        return dtensor.DVariable(
            initial_value=initial_value,
            name=f'{variable_name}/{model_variable._shared_name}',
            dtype=model_variable.dtype,
            trainable=False)