def _model_compile(self, strategy, steps_per_execution=1, run_eagerly=False, with_normalization_layer=False): class ResultAssertingCallback(callbacks_lib.Callback): def __init__(self): self._prev_epoch = -1 self._loss_to_compare_against = 2 # Empirical initial value def on_epoch_end(self, epoch, logs=None): logging.info("testModelFit: epoch=%r, logs=%r", epoch, logs) if epoch <= self._prev_epoch: raise RuntimeError("Epoch is supposed to be larger than previous.") self._prev_epoch = epoch is_loss_float = ( logs.get("loss", None) is not None and isinstance(logs["loss"], (float, np.floating))) if not is_loss_float: raise RuntimeError("loss is supposed to be in the logs and float.") if epoch == 0 or epoch == 9: # Making sure the loss of first epoch is below 1, and that of last # epoch is smaller than the first epoch. if logs["loss"] > self._loss_to_compare_against: raise RuntimeError( "loss at epoch {} is larger than previous.".format(epoch)) self._loss_to_compare_against = logs["loss"] def on_train_end(self, logs=None): if self._prev_epoch != 9: raise RuntimeError("Unexpected last epoch: {}".format( self._prev_epoch)) # TODO(b/182193218): Use ParameterServerStrategy as a proper strategy # combination. if strategy == "ParameterServerStrategy": gpu_devices = config.list_physical_devices("GPU") if len(gpu_devices) > 1: self.skipTest("b/178452835: Multi-GPUs not supported in " "ParameterServerStrategy.") strategy = parameter_server_strategy_v2.ParameterServerStrategyV2( multi_worker_testing_utils.make_parameter_server_cluster(3, 2), variable_partitioner=sharded_variable.FixedShardsPartitioner(2)) with strategy.scope(): model = sequential.Sequential([core_layers.Dense(10)]) if with_normalization_layer: norm = keras.layers.BatchNormalization( axis=-1, input_shape=(4, 4, 3), momentum=0.8) model.add(norm) model.compile( gradient_descent.SGD(), loss="mse", steps_per_execution=steps_per_execution, run_eagerly=run_eagerly) return model, [ResultAssertingCallback()]
def setUpClass(cls): super().setUpClass() cls.strategy = parameter_server_strategy_v2.ParameterServerStrategyV2( multi_worker_testing_utils.make_parameter_server_cluster(3, 2), variable_partitioner=sharded_variable.FixedShardsPartitioner(2))
def make_coordinator(num_workers, num_ps, variable_partitioner=None): return coordinator_lib.ClusterCoordinator( parameter_server_strategy_v2.ParameterServerStrategyV2( multi_worker_testing_utils.make_parameter_server_cluster( num_workers, num_ps), variable_partitioner=variable_partitioner))
def setUpClass(cls): super(KPLCreatedInDatasetsFromFunctionTest, cls).setUpClass() cls.coordinator = coordinator_lib.ClusterCoordinator( parameter_server_strategy_v2.ParameterServerStrategyV2( multi_worker_testing_utils.make_parameter_server_cluster(3, 2)))