def _create_iteration_variable(self): init_val = tf.constant(0, dtype=tf.int64) if self._mesh: init_val = dtensor.copy_to_mesh( init_val, dtensor.Layout.replicated(self._mesh, rank=0)) with tf.init_scope(): # Lift the variable creation to init scope to avoid environment issue. self._iterations = dtensor.DVariable(init_val, name='iteration')
def _build_learning_rate(self, learning_rate): if isinstance(learning_rate, learning_rate_schedule.LearningRateSchedule): # Create a variable to hold the current learning rate. # Note that the init value `learning_rate(self.iterations)` should have # the correct layout information from self.iterations. self._current_learning_rate = dtensor.DVariable( learning_rate(self.iterations), name='learning_rate', dtype=tf.float32) return learning_rate init_val = tf.constant(learning_rate, dtype=tf.float32) if self._mesh: init_val = dtensor.copy_to_mesh( init_val, dtensor.Layout.replicated(self._mesh, rank=0)) return dtensor.DVariable(init_val, name='learning_rate')
def test_conv2d_layer_with_layout(self): conv = layers.Conv2D(32, kernel_size=(3, 3), kernel_layout=self.layout_4d, bias_layout=self.layout_1d) inputs = np.random.randint(size=[10, 28, 28, 1], low=0, high=4) inputs = tf.constant(inputs, dtype=tf.float32) d_inputs = dtensor.copy_to_mesh(inputs, self.layout_4d) output = conv(d_inputs) self.assertIsInstance(conv.kernel, dtensor.DVariable) self.assertIsInstance(conv.bias, dtensor.DVariable) self.assertEqual(dtensor.fetch_layout(output), self.layout_4d) # Make sure to produce same output when layout is not used tf_utils.set_random_seed(1337) conv2 = layers.Conv2D(32, kernel_size=(3, 3)) output_2 = conv2(inputs) self.assertAllClose(output, output_2)
def test_dense_layer_with_layout(self): dense = layers.Dense(10, kernel_layout=self.layout_2d, bias_layout=self.layout_1d) inputs = np.random.randint(size=[32, 8], low=0, high=4) inputs = tf.constant(inputs, dtype=tf.float32) d_inputs = dtensor.copy_to_mesh( inputs, dtensor.Layout.replicated(self.mesh, rank=2)) output = dense(d_inputs) self.assertIsInstance(dense.kernel, dtensor.DVariable) self.assertIsInstance(dense.bias, dtensor.DVariable) expected_layout = dtensor.Layout( [dtensor.UNSHARDED, dtensor.UNSHARDED], self.mesh) self.assertEqual(dtensor.fetch_layout(output), expected_layout) # Make sure to produce same output when layout is not used tf_utils.set_random_seed(1337) dense_2 = layers.Dense(10) output_2 = dense_2(inputs) self.assertAllClose(output, output_2)
def _create_dvariable(layout_map, object_path, variable): """Create a new variable instead of using the LazyInitVariable. We choose to do this since even the LazyInitVariable might behavior like a normal tf.Variable/DVariable, it is not future proof for any new changes to variable class. It will also fail the instance type check in python, which could affect user's code when they do any filtering based on type to find any variables. Args: layout_map: a LayoutMap which contains the variable_object_path (string) -> Layout. object_path: string, the object attribute path for the variable. variable: LazyInitVariable which will be replaced by the newly created tf.Variable. Returns: A new tf.Variable with correct layout information. """ # TODO(scottzhu): Revisit this in future and see if we can just reuse the # LazyInitVariable rather than creating a new tf.Variable instance. layout = layout_map[object_path] if layout is None: variable_rank = variable.shape.rank layout = dtensor.Layout.replicated( mesh=layout_map.get_default_mesh(), rank=variable_rank) init_val = variable._initial_value # pylint: disable=protected-access if callable(init_val): init_val = utils.call_with_layout(init_val, layout) else: # The init value is probably already created as a tensor, we will just copy # it to mesh and give it a proper layout. init_val = dtensor.copy_to_mesh(init_val, layout) new_variable = dtensor.DVariable(init_val, trainable=variable.trainable, name=variable.name) return new_variable
def add_variable_from_reference(self, model_variable, variable_name, initial_value=None): """Create an optimizer variable from model variable. Create an optimizer variable based on the information of model variable. For example, in SGD optimizer momemtum, for each model variable, a corresponding momemtum variable is created of the same shape and dtype. Args: model_variable: The corresponding model variable to the optimizer variable to be created. variable_name: The name prefix of the optimizer variable to be created. The create variables name will follow the pattern `{variable_name}/{model_variable.name}`, e.g., `momemtum/dense_1`. initial_value: The initial value of the optimizer variable, if None, the value will be default to 0. Returns: An optimizer variable. """ if initial_value is None: # Use tf.zeros_like which will propagate the layout information from the # model weights if any. initial_value = tf.zeros_like(model_variable) elif isinstance(initial_value, tf.Tensor): initial_value = dtensor.copy_to_mesh( initial_value, dtensor.Layout.replicated(self._mesh, rank=initial_value.shape.rank)) return dtensor.DVariable( initial_value=initial_value, name=f'{variable_name}/{model_variable._shared_name}', dtype=model_variable.dtype, trainable=False)