def test_init_subclass_model_variable_with_layout(self): layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) layout_map['d1.kernel'] = self.layout_2d layout_map['d1.bias'] = self.layout_1d layout_map['d2.kernel'] = self.layout_2d layout_map['d2.bias'] = self.layout_1d with layout_map_lib.layout_map_scope(layout_map): model = SubclassModel(name='model') # Init the model with eager tensor, make sure the model weights have correct # layout, as well as produce correct result. inputs = tf.zeros((10, 10), layout=self.layout_2d) result = model(inputs) self.assertAllClose(result, tf.zeros((10, 1000))) d1 = model.d1 d2 = model.d2 self.assertEqual(d1.kernel.layout, self.layout_2d) self.assertEqual(d1.bias.layout, self.layout_1d) self.assertEqual(d2.kernel.layout, self.layout_2d) self.assertEqual(d2.bias.layout, self.layout_1d) # Also make sure we repopulate the cached attributes like # layer._trainable_weights self.assertIs(d1.kernel, d1._trainable_weights[0]) self.assertIs(d1.bias, d1._trainable_weights[1]) self.assertIs(d2.kernel, d2._trainable_weights[0]) self.assertIs(d2.bias, d2._trainable_weights[1]) result = model(tf.zeros((10, 10), layout=self.layout_2d), training=True) self.assertAllClose(result, tf.zeros((10, 1000), layout=self.layout_2d))
def test_weight_regularization(self): layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) with layout_map_lib.layout_map_scope(layout_map): model = tf.keras.Sequential([ layers.Dense( 20, name="d1", input_shape=(10, ), kernel_initializer="ones", kernel_regularizer="l2", ), layers.Dropout(0.1), layers.Dense( 30, name="d2", kernel_initializer="ones", kernel_regularizer="l2", ), ]) self.assertLen(model.losses, 2) # kernel shape [10, 20] with all "1", timed by 0.01 from l2 self.assertAllClose(model.losses[0], 2.0) # kernel shape [20, 30] with all "1", timed by 0.01 from l2 self.assertAllClose(model.losses[1], 6.0)
def test_dvariable_name(self): layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) with layout_map_lib.layout_map_scope(layout_map): model = tf.keras.Sequential([ layers.Dense(20, name='d1', input_shape=(10, )), layers.Dropout(0.1), layers.Dense(30, name='d2') ]) self.assertLen(model.layers, 3) self.assertEqual(model.layers[0].kernel.name, 'd1/kernel:0') self.assertEqual(model.layers[0].bias.name, 'd1/bias:0')
def get_model_with_layout_map(layout_map): """Builds a Sequential CNN model to recognize MNIST digits. Args: layout_map: dict of string name -> Layout, for weights creation. Returns: a CNN Keras model used for MNIST """ with layout_map_lib.layout_map_scope(layout_map): # Define a CNN model to recognize MNIST digits. model = models.Sequential() model.add( layers.Conv2D( 32, name="conv2d_1", kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1), # channel last gray scale input )) model.add( layers.Conv2D( 64, name="conv2d_2", kernel_size=(3, 3), activation="relu", )) model.add(layers.MaxPooling2D(pool_size=(2, 2))) model.add(layers.Dropout(0.25)) model.add(layers.Flatten()) model.add(layers.Dense( 128, name="dense_1", activation="relu", )) model.add(layers.Dropout(0.5)) model.add( layers.Dense( NUM_CLASS, name="dense_2", activation="softmax", )) return model
def test_init_model_with_empty_layout_map(self): # Create empty layout map, which means all the weights just default to # all replicated. layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) with layout_map_lib.layout_map_scope(layout_map): model = tf.keras.Sequential([ layers.Dense(20, name='d1', input_shape=(10, )), layers.Dropout(0.1), layers.Dense(30, name='d2') ]) self.assertLen(model.layers, 3) d1 = model.layers[0] d2 = model.layers[2] self.assertEqual(d1.kernel.layout, self.layout_2d) self.assertEqual(d1.bias.layout, self.layout_1d) self.assertEqual(d2.kernel.layout, self.layout_2d) self.assertEqual(d2.bias.layout, self.layout_1d)
def test_init_functional_model_variable_with_layout(self): # Note that the functional model is using layers name + attribute name # the layer name are unique among the functional model, and when the layer # doesn't have a name, keras will give it a unique name based on the layer # class. layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) layout_map['d1.kernel'] = self.layout_2d layout_map['d1.bias'] = self.layout_1d layout_map['d2.kernel'] = self.layout_2d layout_map['d2.bias'] = self.layout_1d with layout_map_lib.layout_map_scope(layout_map): inputs = tf.keras.Input((10, ), batch_size=10) x = layers.Dense(20, name='d1')(inputs) x = layers.Dropout(0.1)(x) output = layers.Dense(30, name='d2')(x) model = tf.keras.Model(inputs, output) # It includes input layer as well. self.assertLen(model.layers, 4) d1 = model.layers[1] d2 = model.layers[3] self.assertEqual(d1.kernel.layout, self.layout_2d) self.assertEqual(d1.bias.layout, self.layout_1d) self.assertEqual(d2.kernel.layout, self.layout_2d) self.assertEqual(d2.bias.layout, self.layout_1d) # Also make sure we repopulate the cached attributes like # layer._trainable_weights self.assertIs(d1.kernel, d1._trainable_weights[0]) self.assertIs(d1.bias, d1._trainable_weights[1]) self.assertIs(d2.kernel, d2._trainable_weights[0]) self.assertIs(d2.bias, d2._trainable_weights[1]) inputs = tf.zeros((10, 10)) inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d) result = model(inputs, training=True) expected_result = tf.zeros((10, 30)) expected_result = dtensor.copy_to_mesh(expected_result, layout=self.layout_2d) self.assertAllClose(result, expected_result)
def test_init_sequential_model_variable_with_layout(self): # Note that the sequential model is using layers name + attribute name # the layer name are unique among the functional model, and when the layer # doesn't have a name, keras will give it a unique name based on the layer # class. layout_map = layout_map_lib.LayoutMap(mesh=self.mesh) layout_map["d1.kernel"] = self.layout_2d layout_map["d1.bias"] = self.layout_1d layout_map["d2.kernel"] = self.layout_2d layout_map["d2.bias"] = self.layout_1d with layout_map_lib.layout_map_scope(layout_map): model = tf.keras.Sequential([ layers.Dense(20, name="d1", input_shape=(10, )), layers.Dropout(0.1), layers.Dense(30, name="d2"), ]) self.assertLen(model.layers, 3) d1 = model.layers[0] d2 = model.layers[2] self.assertEqual(d1.kernel.layout, self.layout_2d) self.assertEqual(d1.bias.layout, self.layout_1d) self.assertEqual(d2.kernel.layout, self.layout_2d) self.assertEqual(d2.bias.layout, self.layout_1d) # Also make sure we repopulate the cached attributes like # layer._trainable_weights self.assertIs(d1.kernel, d1._trainable_weights[0]) self.assertIs(d1.bias, d1._trainable_weights[1]) self.assertIs(d2.kernel, d2._trainable_weights[0]) self.assertIs(d2.bias, d2._trainable_weights[1]) inputs = tf.zeros((10, 10)) inputs = dtensor.copy_to_mesh(inputs, layout=self.layout_2d) result = model(inputs, training=True) expected_result = tf.zeros((10, 30)) expected_result = dtensor.copy_to_mesh(expected_result, layout=self.layout_2d) self.assertAllClose(result, expected_result)