def DISABLED_test_mnist_training_tpu(self): # TODO(scottzhu): Enable TPU test once the dtensor_test rule is migrated # out of learning/brain tpu_util.dtensor_initialize_tpu_system() total_tpu_device_count = dtensor.num_global_devices("TPU") mesh_shape = [total_tpu_device_count] mesh = tpu_util.create_tpu_mesh(["batch"], mesh_shape, "tpu_mesh") # Needed by keras initializers. tf_utils.set_random_seed(1337) model = integration_test_utils.get_model_with_layout_map( integration_test_utils.get_all_replicated_layout_map(mesh)) optimizer = optimizer_lib.Adam(learning_rate=0.001, mesh=mesh) optimizer.build(model.trainable_variables) train_losses = integration_test_utils.train_mnist_model_batch_sharded( model, optimizer, mesh, num_epochs=3, steps_per_epoch=100, global_batch_size=64, ) # Make sure the losses are decreasing self.assertEqual(train_losses, sorted(train_losses, reverse=True))
def test_random_value_initializer(self, initializer_cls, init_args): layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], self.mesh) shape = (4, 4) initializer = initializer_cls(**init_args) # Make sure to raise error when keras global seed is not set. with self.assertRaisesRegex(ValueError, 'set the global seed'): initializer(shape=shape, layout=layout) try: tf_utils.set_random_seed(1337) value = initializer(shape=shape, layout=layout) self.assertEqual(value.shape, shape) fetched_layout = dtensor.fetch_layout(value) self.assertEqual(layout, fetched_layout) # Make sure when same seed is set again, the new initializer should # generate same result tf_utils.set_random_seed(1337) initializer = initializer_cls(**init_args) new_value = initializer(shape=shape, layout=layout) self.assertAllClose(value, new_value) finally: # Unset the keras global generator so that it doesn't affect other tests # that need to verify the existence of global generator. backend._SEED_GENERATOR.generator = None
def test_mnist_training_cpu(self): devices = tf.config.list_physical_devices("CPU") tf.config.set_logical_device_configuration( devices[0], [ tf.config.LogicalDeviceConfiguration(), ] * 8, ) mesh = mesh_util.create_mesh(devices=["CPU:%d" % i for i in range(8)], mesh_dims=[("batch", 8)]) backend.enable_tf_random_generator() # Needed by keras initializers. tf_utils.set_random_seed(1337) model = integration_test_utils.get_model_with_layout_map( integration_test_utils.get_all_replicated_layout_map(mesh)) optimizer = optimizer_lib.Adam(learning_rate=0.001, mesh=mesh) optimizer.build(model.trainable_variables) train_losses = integration_test_utils.train_mnist_model_batch_sharded( model, optimizer, mesh, num_epochs=3, steps_per_epoch=100, global_batch_size=64, ) # Make sure the losses are decreasing self.assertEqual(train_losses, sorted(train_losses, reverse=True))
def test_layer(self, layer_cls, init_args, variable_settings, input_shape, input_dtype=np.float32): args_with_layout = init_args.copy() for variable_name, variable_rank in variable_settings.items(): args_with_layout[variable_name + '_layout'] = dtensor.Layout.replicated( self.mesh, variable_rank) layer = layer_cls(**args_with_layout) # inputs = np.random.random(input_shape) inputs = np.random.randn(*input_shape).astype(input_dtype) d_inputs = dtensor.copy_to_mesh( inputs, dtensor.Layout.replicated(self.mesh, len(input_shape))) d_output = layer(d_inputs) for variable_name, variable_rank in variable_settings.items(): self.assertIsInstance(getattr(layer, variable_name), dtensor.DVariable) expected_layout = dtensor.Layout.replicated(self.mesh, d_output.shape.rank) self.assertEqual(dtensor.fetch_layout(d_output), expected_layout) # Make sure to produce same output when layout is not used tf_utils.set_random_seed(1337) layer_2 = layer_cls(**init_args) output = layer_2(inputs) self.assertAllClose(d_output, output) for variable_name, variable_rank in variable_settings.items(): self.assertNotIsInstance(getattr(layer_2, variable_name), dtensor.DVariable)
def setUp(self): super(MetricsTest, self).setUp() global_ids = test_util.create_device_ids_array((2, 2)) local_device_ids = np.ravel(global_ids).tolist() mesh_dict = { 'CPU': dtensor.Mesh(['X', 'Y'], global_ids, local_device_ids, test_util.create_device_list((2, 2), 'CPU')) } self.mesh = self.configTestMesh(mesh_dict) tf_utils.set_random_seed(1337)
def setUp(self): super().setUp() global_ids = test_util.create_device_ids_array((2, 2)) local_device_ids = np.ravel(global_ids).tolist() mesh_dict = { "CPU": dtensor.Mesh( ["X", "Y"], global_ids, local_device_ids, test_util.create_device_list((2, 2), "CPU"), ) } self.mesh = self.configTestMesh(mesh_dict) tf_utils.set_random_seed(1337)
def setUp(self): super(LayersTest, self).setUp() tf_utils.set_random_seed(1337) global_ids = test_util.create_device_ids_array((2, 2)) local_device_ids = np.ravel(global_ids).tolist() mesh_dict = { 'CPU': dtensor.Mesh(['X', 'Y'], global_ids, local_device_ids, test_util.create_device_list((2, 2), 'CPU')) } self.mesh = self.configTestMesh(mesh_dict) self.layout_4d = dtensor.Layout.replicated(self.mesh, rank=4) self.layout_3d = dtensor.Layout.replicated(self.mesh, rank=3) self.layout_2d = dtensor.Layout.replicated(self.mesh, rank=2) self.layout_1d = dtensor.Layout.replicated(self.mesh, rank=1)
def setUp(self): super(LayoutMapTest, self).setUp() backend.enable_tf_random_generator() tf_utils.set_random_seed(1337) global_ids = test_util.create_device_ids_array((2, 2)) local_device_ids = np.ravel(global_ids).tolist() mesh_dict = { 'CPU': dtensor.Mesh(['X', 'Y'], global_ids, local_device_ids, test_util.create_device_list((2, 2), 'CPU')) } self.mesh = self.configTestMesh(mesh_dict) self.layout_2d = dtensor.Layout.replicated(self.mesh, rank=2) self.layout_1d = dtensor.Layout.replicated(self.mesh, rank=1) self.sharded_2d = dtensor.Layout.batch_sharded(self.mesh, 'X', rank=2) self.sharded_1d = dtensor.Layout.batch_sharded(self.mesh, 'X', rank=1)
def test_conv2d_layer_with_layout(self): conv = layers.Conv2D(32, kernel_size=(3, 3), kernel_layout=self.layout_4d, bias_layout=self.layout_1d) inputs = np.random.randint(size=[10, 28, 28, 1], low=0, high=4) inputs = tf.constant(inputs, dtype=tf.float32) d_inputs = dtensor.copy_to_mesh(inputs, self.layout_4d) output = conv(d_inputs) self.assertIsInstance(conv.kernel, dtensor.DVariable) self.assertIsInstance(conv.bias, dtensor.DVariable) self.assertEqual(dtensor.fetch_layout(output), self.layout_4d) # Make sure to produce same output when layout is not used tf_utils.set_random_seed(1337) conv2 = layers.Conv2D(32, kernel_size=(3, 3)) output_2 = conv2(inputs) self.assertAllClose(output, output_2)
def test_seeds(self): if not tf.__internal__.tf2.enabled(): self.skipTest('set_random_seed() is only expected to work in tf2.') def get_model_output(): model = keras.Sequential([ keras.layers.Dense(10), keras.layers.Dropout(0.5), keras.layers.Dense(10), ]) x = np.random.random((32, 10)).astype('float32') ds = tf.data.Dataset.from_tensor_slices(x).shuffle(32).batch(16) return model.predict(ds) tf_utils.set_random_seed(42) y1 = get_model_output() tf_utils.set_random_seed(42) y2 = get_model_output() self.assertAllClose(y1, y2, atol=1e-6)
def test_dense_layer_with_layout(self): dense = layers.Dense(10, kernel_layout=self.layout_2d, bias_layout=self.layout_1d) inputs = np.random.randint(size=[32, 8], low=0, high=4) inputs = tf.constant(inputs, dtype=tf.float32) d_inputs = dtensor.copy_to_mesh( inputs, dtensor.Layout.replicated(self.mesh, rank=2)) output = dense(d_inputs) self.assertIsInstance(dense.kernel, dtensor.DVariable) self.assertIsInstance(dense.bias, dtensor.DVariable) expected_layout = dtensor.Layout( [dtensor.UNSHARDED, dtensor.UNSHARDED], self.mesh) self.assertEqual(dtensor.fetch_layout(output), expected_layout) # Make sure to produce same output when layout is not used tf_utils.set_random_seed(1337) dense_2 = layers.Dense(10) output_2 = dense_2(inputs) self.assertAllClose(output, output_2)
def test_token_classification(self): def densify(x, y): return x.to_tensor(), y.to_tensor() tf_utils.set_random_seed(1337) data = tf.ragged.stack([ np.random.randint(low=0, high=16, size=random.randint(4, 16)) for _ in range(100) ]) labels = tf.ragged.stack( [np.random.randint(low=0, high=3, size=len(arr)) for arr in data]) features_dataset = tf.data.Dataset.from_tensor_slices(data) labels_dataset = tf.data.Dataset.from_tensor_slices(labels) dataset = tf.data.Dataset.zip((features_dataset, labels_dataset)) dataset = dataset.batch(batch_size=10) dataset = dataset.map(densify) # Pads with 0 values by default layers = [ keras.layers.Embedding(16, 4), keras.layers.Conv1D(4, 5, padding='same', activation='relu'), keras.layers.Conv1D(8, 5, padding='same'), keras.layers.BatchNormalization(), keras.layers.Conv1D(3, 5, padding='same', activation='softmax'), ] model = test_utils.get_model_from_layers(layers, input_shape=(None, )) model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc'], run_eagerly=test_utils.should_run_eagerly()) history = model.fit(dataset, epochs=10, validation_data=dataset, verbose=2) self.assertGreater(history.history['val_acc'][-1], 0.5) _, val_acc = model.evaluate(dataset) self.assertAlmostEqual(history.history['val_acc'][-1], val_acc) predictions = model.predict(dataset) self.assertTrue(isinstance(predictions, tf.RaggedTensor)) self.assertEqual(predictions.shape[0], len(dataset) * 10) self.assertEqual(predictions.shape[-1], 3)