def test_previously_unexpected_cluster_spec(self): with test.mock.patch.dict( "os.environ", {"TF_CONFIG": json.dumps(TF_CONFIG_WITHOUT_TASK)}): run_config_lib.RunConfig(experimental_distribute=DistributeConfig( train_distribute=mirrored_strategy.MirroredStrategy( num_gpus=2)))
def test_calling_with_unsupported_predefined_callbacks(self): with self.cached_session(): model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', '/device:GPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) dataset = get_dataset(strategy) def schedule(_): return 0.001 with self.assertRaisesRegexp(ValueError, 'LearningRateScheduler callback is not ' 'supported with DistributionStrategy.'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[keras.callbacks.LearningRateScheduler(schedule)]) with self.assertRaisesRegexp(ValueError, 'ReduceLROnPlateau callback is not ' 'supported with DistributionStrategy.'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[keras.callbacks.ReduceLROnPlateau()]) with self.assertRaisesRegexp(ValueError, 'histogram_freq in the TensorBoard callback ' 'is not supported when using ' 'DistributionStrategy.'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)])
def test_validating_dataset_input_tensors_with_dtype_mismatch(self): with self.cached_session(): strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:0', '/device:CPU:0']) a = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.int32) b = constant_op.constant([1, 2], shape=(1, 2), dtype=dtypes.float64) x = values.DistributedValues({ '/device:CPU:0': a, '/device:GPU:0': b }) y = values.DistributedValues({ '/device:CPU:0': a, '/device:GPU:0': a }) with strategy.scope(): # Removed device and input tensor dtype details from the error message # since the order of the device and the corresponding input tensor dtype # is not deterministic over different runs. with self.assertRaisesRegexp( ValueError, 'Input tensor dtypes do not match for ' 'distributed tensor inputs ' 'DistributedValues:.+'): distributed_training_utils.validate_distributed_dataset_inputs( strategy, x, y)
def testAssignMirroredVarTowerContextWithSum(self): # Test that we don't reduce a non-per-device value with the "sum" # aggregation type. self._skip_eager_if_gpus_less_than(1) def var_fn(): v = variable_scope.variable(1.0, name="foo") return v dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with dist.scope(): mirrored_var = dist.call_for_each_tower(var_fn, run_concurrently=False) # TODO(anjalisridhar): Use API introduced in cr/201463945 to set the # aggregation method. mirrored_var._aggregation_method = "sum" self.assertIsInstance(mirrored_var, values.MirroredVariable) self.evaluate(variables.global_variables_initializer()) def model_fn(): return mirrored_var.assign(5.0) with self.assertRaisesRegexp( ValueError, "A non PerDevice value cannot be reduced with the given " "method_string."): self.evaluate(dist.unwrap(dist.call_for_each_tower(model_fn)))
def test_calculating_batch_size(self): with self.cached_session(): # 64 is the number of input samples. inputs = np.zeros((64, 3), dtype=np.float32) targets = np.zeros((64, 4), dtype=np.float32) model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', '/device:CPU:0']) strategy._require_static_shapes = True model.compile(optimizer, loss, distribute=strategy) iterator = model._distribution_standardize_user_data(inputs, targets, batch_size=None, check_steps=True, steps_name='steps', steps=3) # The global batch size(21) across all replicas is the ratio of the input # samples(64) to the steps(3). # The batch size(10) per device is the ratio of the global batch size(21) # to the number of replicas(2). # The global batch size and batch size are rounded integer values. self.assertEqual(10, distributed_training_utils.get_batch_dimension( iterator._iterator))
def testWithLayers(self): self._skip_eager_if_gpus_less_than(1) def model_fn(features): with variable_scope.variable_scope("common"): layer1 = core.Dense(1) layer1(features) layer2 = core.Dense(1) layer2(features) # This will pause the current thread, and execute the other thread. distribute_lib.get_tower_context().merge_call(lambda _: _) layer3 = core.Dense(1) layer3(features) return [(layer1.kernel, layer1.bias), (layer2.kernel, layer2.bias), (layer3.kernel, layer3.bias)] dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) features = dist.distribute_dataset( lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat( 10)).make_one_shot_iterator().get_next() with dist.scope(): result = dist.call_for_each_tower(model_fn, features, run_concurrently=False) suffixes = ["", "_1", "_2"] for (kernel, bias), suffix in zip(result, suffixes): self.assertIsInstance(kernel, values.MirroredVariable) self.assertEquals("common/dense" + suffix + "/kernel:0", kernel.name) self.assertIsInstance(bias, values.MirroredVariable) self.assertEquals("common/dense" + suffix + "/bias:0", bias.name)
def testNameScopeWithGetVariable(self): def in_cross_tower(_): c = variable_scope.get_variable("c", [1]) return c def model_fn(): b = variable_scope.get_variable("b", [1]) with ops.name_scope("foo"): c = distribute_lib.get_tower_context().merge_call( in_cross_tower) return b, c dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with context.graph_mode(), dist.scope(): with ops.name_scope("main"): a = variable_scope.get_variable("a", [1]) result = dist.call_for_each_tower(model_fn, run_concurrently=False) result_b = result[0] result_c = result[1] self.assertIsInstance(result_b, values.DistributedValues) self.assertIsInstance(result_c, values.DistributedValues) a0, a1 = dist.unwrap(a) b0, b1 = dist.unwrap(result_b) c0, c1 = dist.unwrap(result_c) self.assertEquals("a:0", a0.name) self.assertEquals("a/replica_1:0", a1.name) self.assertEquals("b:0", b0.name) self.assertEquals("b/replica_1:0", b1.name) self.assertEquals("c:0", c0.name) self.assertEquals("c/replica_1:0", c1.name)
def testTowerLocalVariable(self): self._skip_eager_if_gpus_less_than(1) all_v_sum = {} all_v_mean = {} def model_fn(device_id): tower_context = distribute_lib.get_tower_context() with tower_context.tower_local_var_scope("sum"): v_sum = variable_scope.variable(1.0) with tower_context.tower_local_var_scope("mean"): v_mean = variable_scope.variable(4.0) self.assertTrue(isinstance(v_sum, values.TowerLocalVariable)) self.assertTrue(isinstance(v_mean, values.TowerLocalVariable)) updates = [ v_sum.assign_add(2.0 + device_id), v_mean.assign(6.0 * device_id) ] all_v_sum[device_id] = v_sum all_v_mean[device_id] = v_mean return updates, v_sum, v_mean dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with dist.scope(): # Create "sum" and "mean" versions of TowerLocalVariables. ret_ops, ret_v_sum, ret_v_mean = dist.call_for_each_tower( model_fn, dist.worker_device_index, run_concurrently=False) # Should see the same wrapping instance in all towers. self.assertIs(all_v_sum[0], ret_v_sum) self.assertIs(all_v_mean[0], ret_v_mean) for i in range(1, dist.num_towers): self.assertIs(all_v_sum[0], all_v_sum[1]) self.assertIs(all_v_mean[0], all_v_mean[1]) # Apply updates self.evaluate(variables.global_variables_initializer()) self.evaluate([y for x in ret_ops for y in dist.unwrap(x)]) expected_sum = 0.0 expected_mean = 0.0 for i, d in enumerate(dist.worker_devices): # Test access within a device scope, should see different values. with ops.device(d): v_sum_value = self.evaluate(ret_v_sum.read_value()) v_mean_value = self.evaluate(ret_v_mean.read_value()) expected = i + 3.0 self.assertEqual(expected, v_sum_value) expected_sum += expected expected = i * 6.0 self.assertEqual(expected, v_mean_value) expected_mean += expected # fetch() should return the value you get by applying the # reduction across all towers. self.assertEqual(expected_sum, self.evaluate(dist.fetch(ret_v_sum))) expected_mean /= len(dist.worker_devices) self.assertEqual(expected_mean, self.evaluate(dist.fetch(ret_v_mean)))
def testCreatorStacksAreThreadLocal(self): devices = ["/device:CPU:0", "/device:GPU:0"] dist = mirrored_strategy.MirroredStrategy(devices) def model_fn(device_id): assert isinstance(device_id, int) def thread_creator_fn(next_creator, *args, **kwargs): return next_creator(*args, ** kwargs) + ":thread_" + str(device_id) with variable_scope.variable_creator_scope(thread_creator_fn): # Create a variable in this scope. v = variable_scope.variable(1.0) # This will pause the current thread, and execute the other thread. distribute_lib.get_tower_context().merge_call(lambda _: _) return v def main_thread_creator(next_creator, *args, **kwargs): # We are not using the underlying next_creator for test purposes. del next_creator, args, kwargs return "main_thread" with context.graph_mode(), \ dist.scope(), \ variable_scope.variable_creator_scope(main_thread_creator): result = dist.call_for_each_tower(model_fn, dist.worker_device_index) result = dist.unwrap(result) expected = ["main_thread:thread_0", "main_thread:thread_1"] self.assertEquals(expected, result)
def testAssignSubMirroredVarTowerContext(self): self._skip_eager_if_gpus_less_than(1) def var_fn(): return variable_scope.variable( 5.0, name="foo", aggregation=variable_scope.VariableAggregation.MEAN) dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with dist.scope(): mirrored_var = dist.call_for_each_tower(var_fn, run_concurrently=False) self.assertIsInstance(mirrored_var, values.MirroredVariable) self.evaluate(variables.global_variables_initializer()) self.assertEquals(5.0, self.evaluate(mirrored_var)) def model_fn(): value = math_ops.cast( distribute_lib.get_tower_context().tower_id, mirrored_var.dtype) return mirrored_var.assign_sub(value) self.evaluate( dist.unwrap( dist.call_for_each_tower(model_fn, run_concurrently=False))) self.assertEquals(4.5, self.evaluate(mirrored_var))
def test_calling_model_with_numpy_arrays(self): with self.cached_session(): x = keras.layers.Input(shape=(3,), name='input') y = keras.layers.Dense(4, name='dense')(x) model = keras.Model(x, y) optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', '/device:GPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) inputs = np.zeros((64, 3), dtype=np.float32) targets = np.zeros((64, 4), dtype=np.float32) # Call fit with validation data model.fit(inputs, targets, epochs=1, batch_size=2, verbose=0, validation_data=(inputs, targets)) # TODO(anjalisridhar): We need tests for when the batch size and steps are # smaller and results in a 0 batch_size and steps value. model.evaluate(inputs, targets) # with steps model.evaluate(inputs, targets, steps=2) # with batch_size model.evaluate(inputs, targets, batch_size=8) model.predict(inputs) # with steps model.predict(inputs, steps=2) # with batch_size model.predict(inputs, batch_size=8)
def testAssignTowerLocalVarMeanAggregation(self): self._skip_eager_if_gpus_less_than(1) def model_fn(): v_sum = variable_scope.variable( 1.0, synchronization=variable_scope.VariableSynchronization.ON_READ, aggregation=variable_scope.VariableAggregation.MEAN) return v_sum dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with dist.scope(): tower_local_var = dist.call_for_each_tower(model_fn, run_concurrently=False) self.assertTrue( isinstance(tower_local_var, values.TowerLocalVariable)) self.evaluate(variables.global_variables_initializer()) # Each tower has a value of 1.0 assigned to it in tower context. # When we read the value using `read_var` we should see the MEAN of values # on all towers which is the value assigned in tower context. self.assertEqual(1.0, self.evaluate(dist.read_var(tower_local_var))) tlv_ops = tower_local_var.assign(6.0) self.evaluate(tlv_ops) # On reading the tower local var we should get the MEAN of all values # which is equal to the value assigned. self.assertEqual(6.0, self.evaluate(dist.read_var(tower_local_var)))
def testAssignTowerLocalVarSumAggregation(self): self._skip_eager_if_gpus_less_than(1) def model_fn(): v_sum = variable_scope.variable( 1.0, synchronization=variable_scope.VariableSynchronization.ON_READ, aggregation=variable_scope.VariableAggregation.SUM) return v_sum dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with dist.scope(): tower_local_var = dist.call_for_each_tower(model_fn, run_concurrently=False) self.assertTrue( isinstance(tower_local_var, values.TowerLocalVariable)) self.evaluate(variables.global_variables_initializer()) # Each tower has a value of 1.0 assigned to it in tower context. # When we read the value using `read_var` we should see the SUM of each of # values on each of the towers. self.assertEqual(2.0, self.evaluate(dist.read_var(tower_local_var))) # Assigning 6.0 in cross tower context will assign a value of # 6.0/num_towers to each tower. tlv_ops = tower_local_var.assign(6.0) self.evaluate(tlv_ops) # On reading the tower local var we should get the assigned value back. # The value on all the towers are added before being returned by # `read_var`. self.assertEqual(6.0, self.evaluate(dist.read_var(tower_local_var)))
def testAssignTowerLocalVarInitializer(self): # This test is not eager compatible since in eager variables are initialized # upon construction instead of once the initialization op is run. with context.graph_mode(): def model_fn(): v_sum = variable_scope.variable( 1.0, synchronization=variable_scope.VariableSynchronization. ON_READ, aggregation=variable_scope.VariableAggregation.SUM) self.assertTrue(isinstance(v_sum, values.TowerLocalVariable)) return v_sum dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with dist.scope(): tower_local_var = dist.call_for_each_tower(model_fn) self.assertTrue( isinstance(tower_local_var, values.TowerLocalVariable)) self.assertFalse( self.evaluate(tower_local_var.is_initialized())) self.evaluate(tower_local_var.initializer) self.assertTrue(self.evaluate( tower_local_var.is_initialized()))
def testInputContextPropertyLocal(self): d = mirrored_strategy.MirroredStrategy(num_gpus_per_worker=2) input_fn = self._input_fn_to_test_input_context( expected_num_replicas_in_sync=2, expected_num_input_pipelines=1, expected_input_pipeline_id=0) d.make_input_fn_iterator(input_fn)
def test_fit_with_tuple_and_dict_dataset_inputs(self): with self.cached_session(): model = multi_input_output_model() optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', '/device:CPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 5)) output_d_np = np.random.random((10, 7)) output_e_np = np.random.random((10, 7)) # Test with tuples dataset_tuple = dataset_ops.Dataset.from_tensor_slices(( (input_a_np, input_b_np), (output_d_np, output_e_np))) dataset_tuple = dataset_tuple.repeat(100) dataset_tuple = dataset_tuple.batch(10) model.fit(dataset_tuple, epochs=1, steps_per_epoch=2, verbose=1) # Test with dict dataset_dict = dataset_ops.Dataset.from_tensor_slices(( {'input_a': input_a_np, 'input_b': input_b_np}, (output_d_np, output_e_np))) dataset_dict = dataset_dict.repeat(100) dataset_dict = dataset_dict.batch(10) model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1)
def testCreatorStacksAreThreadLocal(self): devices = ["/device:CPU:0", "/device:GPU:0"] dist = mirrored_strategy.MirroredStrategy(devices) def model_fn(): replica_id_str = str(self.evaluate(_replica_id())) def thread_creator_fn(next_creator, *args, **kwargs): return next_creator(*args, ** kwargs) + ":thread_" + replica_id_str with variable_scope.variable_creator_scope(thread_creator_fn): # Create a variable in this scope. v = variable_scope.variable(1.0) # This will pause the current thread, and execute the other thread. distribution_strategy_context.get_replica_context().merge_call( lambda _: _) return v def main_thread_creator(next_creator, *args, **kwargs): # We are not using the underlying next_creator for test purposes. del next_creator, args, kwargs return "main_thread" with context.graph_mode(), \ dist.scope(), \ variable_scope.variable_creator_scope(main_thread_creator): result = dist.call_for_each_replica(model_fn) result = dist.unwrap(result) expected = ["main_thread:thread_0", "main_thread:thread_1"] self.assertEqual(expected, result)
def test_fit_eval_and_predict_methods_on_dataset(self): with self.test_session(): x = keras.layers.Input(shape=(3, ), name='input') y = keras.layers.Dense(4, name='dense')(x) model = keras.Model(x, y) optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:0', '/device:CPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) inputs = np.zeros((10, 3), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) model.predict(dataset, steps=2) # Test with validation data model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2)
def testWithGetVariableAndVariableScope(self): self._skip_eager_if_gpus_less_than(1) def model_fn(): v0 = variable_scope.get_variable("var-thread0", [1]) with variable_scope.variable_scope("common"): v1 = variable_scope.get_variable("var-thread1", [1]) # This will pause the current thread, and execute the other thread. distribute_lib.get_tower_context().merge_call(lambda _: _) v2 = variable_scope.get_variable("var-thread2", [1]) return v0, v1, v2 devices = ["/device:CPU:0", "/device:GPU:0"] dist = mirrored_strategy.MirroredStrategy(devices) with dist.scope(): with variable_scope.variable_scope("main"): v = variable_scope.get_variable("var-main0", [1]) self.assertEquals("main/var-main0:0", v.name) result = dist.call_for_each_tower(model_fn, run_concurrently=False) self.assertEquals(3, len(result)) v0, v1, v2 = result self.assertIsInstance(v0, values.MirroredVariable) self.assertEquals("main/var-thread0:0", v0.name) self.assertIsInstance(v1, values.MirroredVariable) self.assertEquals("main/common/var-thread1:0", v1.name) self.assertIsInstance(v2, values.MirroredVariable) self.assertEquals("main/common/var-thread2:0", v2.name)
def test_raise_error_for_stateful_metrics(self): class ExampleStatefulMetric(keras.layers.Layer): def __init__(self, name='true_positives', **kwargs): super(ExampleStatefulMetric, self).__init__(name=name, **kwargs) self.stateful = True def __call__(self, y_true, y_pred): return y_pred - y_true with self.test_session(): x = keras.layers.Input(shape=(3, ), name='input') y = keras.layers.Dense(4, name='dense')(x) model = keras.Model(x, y) optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', ExampleStatefulMetric()] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:1', '/device:GPU:0']) with self.assertRaisesRegexp( NotImplementedError, 'Stateful metrics are not supported with ' 'DistributionStrategy.'): model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
def testAssignMirroredVarTowerContextWithoutAggregationType(self): # Test that we always have an aggregation type set on the mirrored variable # if we assign to it in tower mode. self._skip_eager_if_gpus_less_than(1) def var_fn(): v = variable_scope.variable(1.0, name="foo") return v dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with dist.scope(): mirrored_var = dist.call_for_each_tower(var_fn, run_concurrently=False) self.assertIsInstance(mirrored_var, values.MirroredVariable) self.evaluate(variables.global_variables_initializer()) def model_fn(): return mirrored_var.assign(5.0) with self.assertRaisesRegexp( ValueError, "You must specify an aggregation method to update a " "MirroredVariable in Tower Context."): self.evaluate(dist.unwrap(dist.call_for_each_tower(model_fn)))
def test_batchnorm_correctness(self): with self.test_session(): model = keras.models.Sequential() norm = keras.layers.BatchNormalization(input_shape=(10, ), momentum=0.8) model.add(norm) strategy = mirrored_strategy.MirroredStrategy( ['/device:CPU:0', '/device:GPU:0']) model.compile( loss='mse', optimizer=gradient_descent.GradientDescentOptimizer(0.01), distribute=strategy) # centered on 5.0, variance 10.0 x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) dataset = dataset_ops.Dataset.from_tensor_slices((x, x)) dataset = dataset.repeat(100) dataset = dataset.batch(32) model.fit(dataset, epochs=4, verbose=0, steps_per_epoch=10) out = model.predict(dataset, steps=2) out -= keras.backend.eval(norm.beta) out /= keras.backend.eval(norm.gamma) np.testing.assert_allclose(out.mean(), 0.0, atol=1e-1) np.testing.assert_allclose(out.std(), 1.0, atol=1e-1)
def testAssignSubMirroredVarTowerContext(self): self._skip_eager_if_gpus_less_than(1) def var_fn(): return variable_scope.variable(5.0, name="foo") dist = mirrored_strategy.MirroredStrategy( ["/device:GPU:0", "/device:CPU:0"]) with dist.scope(): mirrored_var = dist.call_for_each_tower(var_fn, run_concurrently=False) # TODO(anjalisridhar): Use API introduced in cr/201463945 to set the # aggregation method. mirrored_var._aggregation_method = "mean" self.assertIsInstance(mirrored_var, values.MirroredVariable) self.evaluate(variables.global_variables_initializer()) self.assertEquals(5.0, self.evaluate(mirrored_var)) def model_fn(): value = math_ops.cast( distribute_lib.get_tower_context().tower_id, mirrored_var.dtype) return mirrored_var.assign_sub(value) self.evaluate( dist.unwrap( dist.call_for_each_tower(model_fn, run_concurrently=False))) self.assertEquals(4.5, self.evaluate(mirrored_var))
def test_train_sequential_with_distribution_strategy(self): dist = mirrored_strategy.MirroredStrategy( devices=['/device:GPU:0', '/device:GPU:1']) keras_model = simple_sequential_model() keras_model.compile( loss='categorical_crossentropy', metrics=[keras.metrics.CategoricalAccuracy()], optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01)) config = run_config_lib.RunConfig(tf_random_seed=_RANDOM_SEED, model_dir=self._base_dir, train_distribute=dist) with self.cached_session(): est_keras = keras_lib.model_to_estimator(keras_model=keras_model, config=config) before_eval_results = est_keras.evaluate( input_fn=get_ds_test_input_fn, steps=1) est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16) after_eval_results = est_keras.evaluate( input_fn=get_ds_test_input_fn, steps=1) self.assertLess(after_eval_results['loss'], before_eval_results['loss']) writer_cache.FileWriterCache.clear() gfile.DeleteRecursively(self._config.model_dir)
def test_dataset_input_shape_validation(self): with self.cached_session(): model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:1', '/device:GPU:0']) model.compile(optimizer, loss, distribute=strategy) # User forgets to batch the dataset inputs = np.zeros((10, 3), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0) # Wrong input shape inputs = np.zeros((10, 5), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) with self.assertRaisesRegexp(ValueError, 'expected input to have shape'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)
def test_learning_phase_value(self): # TODO(anjalisridhar): Modify this test to use Lambdas since we can compare # meaningful values. Currently we don't pass the learning phase if the # Lambda layer uses the learning phase. with self.cached_session(): x = keras.layers.Input(shape=(16, ), name='input') y = keras.layers.Dense(16)(x) z = keras.layers.Dropout(0.9999)(y) model = keras.Model(x, z) optimizer = gradient_descent.GradientDescentOptimizer(0.005) loss = 'mse' metrics = ['acc'] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:0', '/device:CPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) inputs = np.random.rand(10, 16) targets = np.ones((10, 16), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(8) hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1) self.assertEqual(hist.history['acc'][0], 1) evaluate_output = model.evaluate(dataset, steps=20) self.assertEqual(evaluate_output[1], 0) predict_output = model.predict(dataset, steps=1) self.assertNotEqual(np.mean(predict_output), 0)
def test_correctness(self): with self.cached_session(): keras.backend.set_image_data_format('channels_last') num_samples = 10000 x_train = np.random.rand(num_samples, 1) y_train = 3 * x_train x_train = x_train.astype('float32') y_train = y_train.astype('float32') model = keras.Sequential() model.add(keras.layers.Dense(1, input_shape=(1, ))) # With DistributionStrategy dataset_with = dataset_ops.Dataset.from_tensor_slices( (x_train, y_train)) dataset_with = dataset_with.batch(32) strategy = mirrored_strategy.MirroredStrategy( devices=['/device:CPU:0', '/device:GPU:0']) model.compile( loss=keras.losses.mean_squared_error, optimizer=gradient_descent.GradientDescentOptimizer(0.5), distribute=strategy) model.fit(x=dataset_with, epochs=1, steps_per_epoch=310) wts_with_ds = model.get_weights() x_predict = [[1], [2], [3], [4]] predict_dataset_with = dataset_ops.Dataset.from_tensor_slices( (x_predict, x_predict)) predict_dataset_with = predict_dataset_with.batch(2) predict_with_ds = model.predict(predict_dataset_with, steps=1) predict_with_ds = np.reshape(predict_with_ds, (4, 1)) # Without DistributionStrategy dataset_without = dataset_ops.Dataset.from_tensor_slices( (x_train, y_train)) dataset_without = dataset_without.batch(64) model.compile( loss=keras.losses.mean_squared_error, optimizer=gradient_descent.GradientDescentOptimizer(0.5)) model.fit(x=dataset_without, epochs=1, steps_per_epoch=310) wts_without_ds = model.get_weights() x_predict = [[1], [2], [3], [4]] predict_dataset_without = dataset_ops.Dataset.from_tensor_slices( (x_predict, x_predict)) predict_dataset_without = predict_dataset_without.batch(4) predict_without_ds = model.predict(predict_dataset_without, steps=1) # Verify that the weights are the same within some limits of tolerance. np.testing.assert_allclose(wts_with_ds[0], wts_without_ds[0], rtol=1e-3) # Verify that the predicted outputs are the same within some limits of # tolerance. np.testing.assert_allclose(predict_with_ds, predict_without_ds, rtol=1e-3)
def test_calculating_batch_params(self): # This verifies that we calculate the number of steps when the batch size # is specified. with self.cached_session(): # 64 is the number of input samples. inputs = np.zeros((64, 3), dtype=np.float32) # The number of replicas is equal to 3. strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', '/device:CPU:0', '/device:GPU:1']) with self.assertRaisesRegexp(ValueError, 'The number of samples is not ' 'divisible by batch size.'): # The batch size(128) is larger than the number of input # samples(64). distributed_training_utils.get_input_batch_params(inputs, 128, strategy) with self.assertRaisesRegexp(ValueError, 'is smaller than the number ' 'of replicas'): # The batch size(32) * num_replicas_in_sync(3) is 96 which is greater # than the number of input samples(64). distributed_training_utils.get_input_batch_params(inputs, 32, strategy) # The number of replicas now is equal to 2. strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', '/device:CPU:0']) # 32 is the batch size per replica. steps = distributed_training_utils.get_input_batch_params(inputs, 32, strategy) # The number of batches is the ratio of input samples(64) to # batch size(32) which is 2. The number of steps(1) is the ratio of # number of batches(2) to the number of replicas(2). self.assertEqual(steps, 1) # 16 is the batch size per replica. steps = distributed_training_utils.get_input_batch_params(inputs, 16, strategy) # The number of batches is the ratio of input samples(64) to # batch size(16) which is 4. The number of steps(2) is the ratio of # number of batches(4) to the number of replicas(2). self.assertEqual(steps, 2)
def test_unsupported_features(self): with self.cached_session(): x = keras.layers.Input(shape=(3, ), name='input') y = keras.layers.Dense(4, name='dense')(x) model = keras.Model(x, y) optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae'] strategy = mirrored_strategy.MirroredStrategy( ['/device:GPU:1', '/device:GPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) inputs = np.zeros((10, 3), dtype=np.float32) targets = np.zeros((10, 4), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) dataset = dataset.repeat(100) dataset = dataset.batch(10) # Test with validation split with self.assertRaisesRegexp( ValueError, '`validation_split` argument is not ' 'supported when input `x` is a dataset or a ' 'dataset iterator.+'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_split=0.5, validation_steps=2) # Test with sample weight. sample_weight = np.random.random((10, )) with self.assertRaisesRegexp( NotImplementedError, '`sample_weight` is currently not supported ' 'when using DistributionStrategy.'): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, sample_weight=sample_weight) # Test with not specifying the `steps` argument. with self.assertRaisesRegexp( ValueError, 'you should specify the `steps_per_epoch` argument'): model.fit(dataset, epochs=1, verbose=0) with self.assertRaisesRegexp( ValueError, 'you should specify the `steps` argument'): model.evaluate(dataset, verbose=0) with self.assertRaisesRegexp( ValueError, 'you should specify the `steps` argument'): model.predict(dataset, verbose=0)
def _get_distribution_strategy(self): devices = ["/device:CPU:0", "/device:GPU:0"] if GPU_TEST: self.assertGreater(context.num_gpus(), 0) if context.num_gpus() > 1: devices = ["/device:GPU:0", "/device:GPU:1"] print(self.id().split(".")[-1], "devices:", ", ".join(devices)) return mirrored_strategy.MirroredStrategy(devices)