Beispiel #1
0
    def test_init_subclass_model_variable_with_layout(self):
        layout_map = layout_map_lib.LayoutMap(mesh=self.mesh)
        layout_map['d1.kernel'] = self.layout_2d
        layout_map['d1.bias'] = self.layout_1d
        layout_map['d2.kernel'] = self.layout_2d
        layout_map['d2.bias'] = self.layout_1d

        with layout_map_lib.layout_map_scope(layout_map):
            model = SubclassModel(name='model')

        # Init the model with eager tensor, make sure the model weights have correct
        # layout, as well as produce correct result.
        inputs = tf.zeros((10, 10))
        inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d)
        result = model(inputs)
        self.assertAllClose(result, tf.zeros((10, 1000)))
        d1 = model.d1
        d2 = model.d2
        self.assertEqual(d1.kernel.layout, self.layout_2d)
        self.assertEqual(d1.bias.layout, self.layout_1d)
        self.assertEqual(d2.kernel.layout, self.layout_2d)
        self.assertEqual(d2.bias.layout, self.layout_1d)

        # Also make sure we repopulate the cached attributes like
        # layer._trainable_weights
        self.assertIs(d1.kernel, d1._trainable_weights[0])
        self.assertIs(d1.bias, d1._trainable_weights[1])
        self.assertIs(d2.kernel, d2._trainable_weights[0])
        self.assertIs(d2.bias, d2._trainable_weights[1])

        result = model(inputs, training=True)
        self.assertAllClose(result, tf.zeros((10, 1000),
                                             layout=self.layout_2d))
Beispiel #2
0
    def test_apply_gradients(self, optimizer_cls, init_args,
                             expect_variable_names):
        optimizer = optimizer_cls(mesh=self.mesh, **init_args)

        self.assertEqual(self.evaluate(optimizer.iterations), 0)
        self.assertEqual(
            optimizer.iterations.layout,
            dtensor.Layout.replicated(self.mesh, rank=0),
        )

        variable_init_value = tf.ones([4, 4], dtype=tf.float32)
        variable_init_value = dtensor.copy_to_mesh(
            variable_init_value,
            layout=dtensor.Layout.replicated(self.mesh, rank=2),
        )
        model_variable = dtensor.DVariable(variable_init_value, trainable=True)

        grads = tf.ones_like(variable_init_value)
        optimizer.apply_gradients(zip([grads], [model_variable]))
        optimizer_variables = optimizer.variables

        self.assertEqual(self.evaluate(optimizer.iterations), 1)

        all_names = [var._shared_name for var in optimizer_variables]
        expect_variable_names.extend(["iteration", "learning_rate"])
        self.assertCountEqual(all_names, expect_variable_names)
Beispiel #3
0
  def test_layer(self, layer_cls, init_args, variable_settings, input_shape,
                 input_dtype=np.float32):
    args_with_layout = init_args.copy()
    for variable_name, variable_rank in variable_settings.items():
      args_with_layout[variable_name + '_layout'] = dtensor.Layout.replicated(
          self.mesh, variable_rank)

    layer = layer_cls(**args_with_layout)
    # inputs = np.random.random(input_shape)
    inputs = np.random.randn(*input_shape).astype(input_dtype)
    d_inputs = dtensor.copy_to_mesh(
        inputs, dtensor.Layout.replicated(self.mesh, len(input_shape)))
    d_output = layer(d_inputs)

    for variable_name, variable_rank in variable_settings.items():
      self.assertIsInstance(getattr(layer, variable_name), dtensor.DVariable)

    expected_layout = dtensor.Layout.replicated(self.mesh, d_output.shape.rank)
    self.assertEqual(dtensor.fetch_layout(d_output), expected_layout)

    # Make sure to produce same output when layout is not used
    tf_utils.set_random_seed(1337)
    layer_2 = layer_cls(**init_args)
    output = layer_2(inputs)
    self.assertAllClose(d_output, output)

    for variable_name, variable_rank in variable_settings.items():
      self.assertNotIsInstance(getattr(layer_2, variable_name),
                               dtensor.DVariable)
Beispiel #4
0
 def _create_iteration_variable(self):
     init_val = tf.constant(0, dtype=tf.int64)
     if self._mesh:
         init_val = dtensor.copy_to_mesh(
             init_val, dtensor.Layout.replicated(self._mesh, rank=0))
     with tf.init_scope():
         # Lift the variable creation to init scope to avoid environment issue.
         self._iterations = dtensor.DVariable(init_val, name='iteration')
Beispiel #5
0
    def test_init_functional_model_variable_with_layout(self):
        # Note that the functional model is using layers name + attribute name
        # the layer name are unique among the functional model, and when the layer
        # doesn't have a name, keras will give it a unique name based on the layer
        # class.
        layout_map = layout_map_lib.LayoutMap(mesh=self.mesh)
        layout_map['d1.kernel'] = self.layout_2d
        layout_map['d1.bias'] = self.layout_1d
        layout_map['d2.kernel'] = self.layout_2d
        layout_map['d2.bias'] = self.layout_1d

        with layout_map_lib.layout_map_scope(layout_map):
            inputs = tf.keras.Input((10, ), batch_size=10)
            x = layers.Dense(20, name='d1')(inputs)
            x = layers.Dropout(0.1)(x)
            output = layers.Dense(30, name='d2')(x)

            model = tf.keras.Model(inputs, output)

        # It includes input layer as well.
        self.assertLen(model.layers, 4)
        d1 = model.layers[1]
        d2 = model.layers[3]

        self.assertEqual(d1.kernel.layout, self.layout_2d)
        self.assertEqual(d1.bias.layout, self.layout_1d)
        self.assertEqual(d2.kernel.layout, self.layout_2d)
        self.assertEqual(d2.bias.layout, self.layout_1d)

        # Also make sure we repopulate the cached attributes like
        # layer._trainable_weights
        self.assertIs(d1.kernel, d1._trainable_weights[0])
        self.assertIs(d1.bias, d1._trainable_weights[1])
        self.assertIs(d2.kernel, d2._trainable_weights[0])
        self.assertIs(d2.bias, d2._trainable_weights[1])

        inputs = tf.zeros((10, 10))
        inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d)
        result = model(inputs, training=True)
        expected_result = tf.zeros((10, 30))
        expected_result = dtensor.copy_to_mesh(expected_result,
                                               layout=self.layout_2d)
        self.assertAllClose(result, expected_result)
Beispiel #6
0
    def test_init_sequential_model_variable_with_layout(self):
        # Note that the sequential model is using layers name + attribute name
        # the layer name are unique among the functional model, and when the layer
        # doesn't have a name, keras will give it a unique name based on the layer
        # class.
        layout_map = layout_map_lib.LayoutMap(mesh=self.mesh)
        layout_map["d1.kernel"] = self.layout_2d
        layout_map["d1.bias"] = self.layout_1d
        layout_map["d2.kernel"] = self.layout_2d
        layout_map["d2.bias"] = self.layout_1d

        with layout_map_lib.layout_map_scope(layout_map):
            model = tf.keras.Sequential([
                layers.Dense(20, name="d1", input_shape=(10, )),
                layers.Dropout(0.1),
                layers.Dense(30, name="d2"),
            ])

        self.assertLen(model.layers, 3)
        d1 = model.layers[0]
        d2 = model.layers[2]

        self.assertEqual(d1.kernel.layout, self.layout_2d)
        self.assertEqual(d1.bias.layout, self.layout_1d)
        self.assertEqual(d2.kernel.layout, self.layout_2d)
        self.assertEqual(d2.bias.layout, self.layout_1d)

        # Also make sure we repopulate the cached attributes like
        # layer._trainable_weights
        self.assertIs(d1.kernel, d1._trainable_weights[0])
        self.assertIs(d1.bias, d1._trainable_weights[1])
        self.assertIs(d2.kernel, d2._trainable_weights[0])
        self.assertIs(d2.bias, d2._trainable_weights[1])

        inputs = tf.zeros((10, 10))
        inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d)
        result = model(inputs, training=True)
        expected_result = tf.zeros((10, 30))
        expected_result = dtensor.copy_to_mesh(expected_result,
                                               layout=self.layout_2d)
        self.assertAllClose(result, expected_result)
Beispiel #7
0
 def test_build_index_dict(self):
     optimizer = optimizers.Adam(mesh=self.mesh)
     variable_init_value = tf.ones(shape=(), dtype=tf.float32)
     variable_init_value = dtensor.copy_to_mesh(
         variable_init_value,
         layout=dtensor.Layout.replicated(self.mesh, rank=0))
     var_list = [
         dtensor.DVariable(variable_init_value, name=f'var{i}')
         for i in range(10)
     ]
     optimizer._build_index_dict(var_list)
     self.assertEqual(
         optimizer._index_dict[optimizer._var_key(var_list[7])], 7)
Beispiel #8
0
 def test_add_variable_from_reference(self):
     optimizer = optimizers.Adam(mesh=self.mesh)
     variable_init_value = tf.ones([4, 4], dtype=tf.float32)
     variable_init_value = dtensor.copy_to_mesh(
         variable_init_value,
         layout=dtensor.Layout.replicated(self.mesh, rank=2))
     model_variable = dtensor.DVariable(variable_init_value,
                                        trainable=True,
                                        name='tmp')
     state_variable = optimizer.add_variable_from_reference(
         model_variable, 'test')
     self.assertEqual(state_variable._shared_name, 'test/tmp')
     self.assertAllClose(self.evaluate(state_variable), tf.zeros([4, 4]))
     # Make sure the variable contains the correct layout info
     self.assertEqual(state_variable.layout, model_variable.layout)
Beispiel #9
0
 def _build_learning_rate(self, learning_rate):
     if isinstance(learning_rate,
                   learning_rate_schedule.LearningRateSchedule):
         # Create a variable to hold the current learning rate.
         # Note that the init value `learning_rate(self.iterations)` should have
         # the correct layout information from self.iterations.
         self._current_learning_rate = dtensor.DVariable(
             learning_rate(self.iterations),
             name='learning_rate',
             dtype=tf.float32)
         return learning_rate
     init_val = tf.constant(learning_rate, dtype=tf.float32)
     if self._mesh:
         init_val = dtensor.copy_to_mesh(
             init_val, dtensor.Layout.replicated(self._mesh, rank=0))
     return dtensor.DVariable(init_val, name='learning_rate')
Beispiel #10
0
    def test_conv2d_layer_with_layout(self):
        conv = layers.Conv2D(32,
                             kernel_size=(3, 3),
                             kernel_layout=self.layout_4d,
                             bias_layout=self.layout_1d)
        inputs = np.random.randint(size=[10, 28, 28, 1], low=0, high=4)
        inputs = tf.constant(inputs, dtype=tf.float32)
        d_inputs = dtensor.copy_to_mesh(inputs, self.layout_4d)
        output = conv(d_inputs)
        self.assertIsInstance(conv.kernel, dtensor.DVariable)
        self.assertIsInstance(conv.bias, dtensor.DVariable)
        self.assertEqual(dtensor.fetch_layout(output), self.layout_4d)

        # Make sure to produce same output when layout is not used
        tf_utils.set_random_seed(1337)
        conv2 = layers.Conv2D(32, kernel_size=(3, 3))
        output_2 = conv2(inputs)
        self.assertAllClose(output, output_2)
Beispiel #11
0
def _create_dvariable(layout_map, object_path, variable):
  """Create a new variable instead of using the LazyInitVariable.

  We choose to do this since even the LazyInitVariable might behavior like
  a normal tf.Variable/DVariable, it is not future proof for any new changes
  to variable class. It will also fail the instance type check in python,
  which could affect user's code when they do any filtering based on type to
  find any variables.

  Args:
    layout_map: a LayoutMap which contains the variable_object_path (string) ->
      Layout.
    object_path: string, the object attribute path for the variable.
    variable: LazyInitVariable which will be replaced by the newly created
      tf.Variable.
  Returns:
    A new tf.Variable with correct layout information.
  """
  # TODO(b/228209108): Revisit this in future and see if we can just reuse the
  # LazyInitVariable rather than creating a new tf.Variable instance.
  layout = layout_map[object_path]
  if layout is None:
    variable_rank = variable.shape.rank
    layout = dtensor.Layout.replicated(
        mesh=layout_map.get_default_mesh(),
        rank=variable_rank)
  init_val = variable._initial_value  # pylint: disable=protected-access
  if callable(init_val):
    with lazy_variable.disable_init_variable_creator():
      init_val = utils.call_with_layout(init_val, layout)
  else:
    # The init value is probably already created as a tensor, we will just copy
    # it to mesh and give it a proper layout.
    init_val = dtensor.copy_to_mesh(init_val, layout)
  # Use the original variable name for new DVariable creation. TF was adding
  # ":0" suffix to it.
  variable_name = variable.name
  if variable_name.endswith(':0'):
    variable_name = variable_name[:-2]
  new_variable = dtensor.DVariable(init_val,
                                   trainable=variable.trainable,
                                   name=variable_name)
  return new_variable
Beispiel #12
0
    def test_dense_layer_with_layout(self):
        dense = layers.Dense(10,
                             kernel_layout=self.layout_2d,
                             bias_layout=self.layout_1d)
        inputs = np.random.randint(size=[32, 8], low=0, high=4)
        inputs = tf.constant(inputs, dtype=tf.float32)
        d_inputs = dtensor.copy_to_mesh(
            inputs, dtensor.Layout.replicated(self.mesh, rank=2))

        output = dense(d_inputs)
        self.assertIsInstance(dense.kernel, dtensor.DVariable)
        self.assertIsInstance(dense.bias, dtensor.DVariable)
        expected_layout = dtensor.Layout(
            [dtensor.UNSHARDED, dtensor.UNSHARDED], self.mesh)
        self.assertEqual(dtensor.fetch_layout(output), expected_layout)

        # Make sure to produce same output when layout is not used
        tf_utils.set_random_seed(1337)
        dense_2 = layers.Dense(10)
        output_2 = dense_2(inputs)
        self.assertAllClose(output, output_2)
Beispiel #13
0
    def add_variable_from_reference(self,
                                    model_variable,
                                    variable_name,
                                    initial_value=None):
        """Create an optimizer variable from model variable.

        Create an optimizer variable based on the information of model variable.
        For example, in SGD optimizer momemtum, for each model variable, a
        corresponding momemtum variable is created of the same shape and dtype.

        Args:
          model_variable: The corresponding model variable to the optimizer variable
            to be created.
          variable_name: The name prefix of the optimizer variable to be created.
            The create variables name will follow the pattern
            `{variable_name}/{model_variable.name}`, e.g., `momemtum/dense_1`.
          initial_value: The initial value of the optimizer variable, if None, the
            value will be default to 0.

        Returns:
          An optimizer variable.
        """
        if initial_value is None:
            # Use tf.zeros_like which will propagate the layout information from the
            # model weights if any.
            initial_value = tf.zeros_like(model_variable)
        elif isinstance(initial_value, tf.Tensor):
            initial_value = dtensor.copy_to_mesh(
                initial_value,
                dtensor.Layout.replicated(self._mesh,
                                          rank=initial_value.shape.rank),
            )
        return dtensor.DVariable(
            initial_value=initial_value,
            name=f"{variable_name}/{model_variable._shared_name}",
            dtype=model_variable.dtype,
            trainable=False,
        )