def test_adapt_sequence() -> None: seqs = utils.make_xor_data_sequences() train = keras._adapt_keras_data(seqs[0], batch_size=1) assert isinstance(train, keras.SequenceAdapter) test = keras._adapt_keras_data(seqs[1], batch_size=1) assert isinstance(test, keras.SequenceAdapter) assert seqs[0] is train._sequence._sequence assert seqs[1] is test._sequence._sequence assert train is keras._adapt_keras_data(train, batch_size=1) assert test is keras._adapt_keras_data(test, batch_size=1)
def test_adapt_list_of_np_arrays_as_y() -> None: adapted = keras._adapt_keras_data( x=np.arange(0, 100), y=[np.arange(100, 200), np.arange(200, 300)], batch_size=16, drop_leftovers=False, ) assert isinstance(adapted, keras.SequenceAdapter) assert len(adapted) == 7 batch_x, batch_y = adapted._sequence[3] assert np.array_equal(batch_x, np.arange(48, 64)) assert np.array_equal(batch_y[0], np.arange(148, 164)) assert np.array_equal(batch_y[1], np.arange(248, 264))
def test_adapt_dict_of_np_arrays_as_x() -> None: adapted = keras._adapt_keras_data( x={ "k1": np.arange(0, 100), "k2": np.arange(100, 200) }, y=np.arange(200, 300), batch_size=16, drop_leftovers=False, ) assert isinstance(adapted, keras.SequenceAdapter) assert len(adapted) == 7 batch_x, batch_y = adapted._sequence[3] assert np.array_equal(batch_x["k1"], np.arange(48, 64)) assert np.array_equal(batch_x["k2"], np.arange(148, 164)) assert np.array_equal(batch_y, np.arange(248, 264))
def from_trial( trial_inst: det.Trial, context: det.TrialContext, env: det.EnvContext, workloads: workload.Stream, load_path: Optional[pathlib.Path], rendezvous_info: det.RendezvousInfo, hvd_config: horovod.HorovodContext, ) -> det.TrialController: check.is_instance( context, keras.TFKerasTrialContext, "TFKerasTrialController needs a TFKerasTrialContext") context = cast(keras.TFKerasTrialContext, context) check.is_instance(trial_inst, TFKerasTrial, "TFKerasTrialController needs a TFKerasTrial") trial = cast(TFKerasTrial, trial_inst) session = TFKerasTrialController._configure_session( env, hvd_config, trial.session_config()) training_x, training_y, training_sample_weight = keras._get_x_y_and_sample_weight( input_data=trial.build_training_data_loader()) training_data = keras._adapt_keras_data( x=training_x, y=training_y, sample_weight=training_sample_weight, batch_size=context.get_per_slot_batch_size(), drop_leftovers=True, ) val_x, val_y, val_sample_weight = keras._get_x_y_and_sample_weight( input_data=trial.build_validation_data_loader()) validation_data = keras._adapt_keras_data( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=context.get_per_slot_batch_size(), drop_leftovers=False, ) trial.build_model() check.is_not_none(context.model, "Please call wrap_model(...).") check.is_not_none(context.compile_args, "Please call model.compile(...).") compile_args = cast(inspect.BoundArguments, context.compile_args) TFKerasTrialController.compile_model(context=context, compile_args=compile_args, env=env, hvd_config=hvd_config) tf_keras_callbacks = trial.keras_callbacks() return TFKerasTrialController( context.model, session, keras.TFKerasTrainConfig(training_data, validation_data, tf_keras_callbacks), context, env, workloads, load_path, rendezvous_info, hvd_config, )
def fit(wrapper, *args: Any, **kwargs: Any) -> None: """Communicate a model, data, and other training configuration with the harness. Parameters: the same as tf.keras.Model.fit except for this function only handles the following cases of data x: Input data. It could be: 1) A Numpy array (or array-like), or a list of arrays (in case the model has multiple inputs). 2) A dict mapping input names to the corresponding array, if the model has named inputs. 3) A tf.data dataset. Should return a tuple of either (inputs, targets) or (inputs, targets, sample_weights). 4) A keras.utils.Sequence returning (inputs, targets) or (inputs, targets, sample weights). y: Target data. Like the input data x, it could be either Numpy array(s). If x is a dataset or keras.utils.Sequence instance, y should not be specified(since targets will be obtained from x). validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. validation_data will override validation_split. validation_data could be: 1) tuple (x_val, y_val) of Numpy arrays 2) tuple (x_val, y_val, val_sample_weights) of Numpy arrays 3) dataset For the first two cases, batch_size must be provided. For the last case, validation_steps could be provided. """ if not self.compile_args: raise errors.InvalidExperimentException( "Must call .compile before calling .fit().") fit_args = inspect.signature(model.fit).bind(*args, **kwargs) fit_args.apply_defaults() # TODO: Use batch size from context instead of fit call. training_data = keras._adapt_keras_data( x=fit_args.arguments["x"], y=fit_args.arguments["y"], sample_weight=fit_args.arguments["sample_weight"], batch_size=self.env.per_slot_batch_size, use_multiprocessing=fit_args. arguments["use_multiprocessing"], workers=fit_args.arguments["workers"], max_queue_size=fit_args.arguments["max_queue_size"], drop_leftovers=True, ) val_x, val_y, val_sample_weight = keras._get_x_y_and_sample_weight( input_data=fit_args.arguments["validation_data"]) validation_data = keras._adapt_keras_data( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=self.env.per_slot_batch_size, use_multiprocessing=fit_args. arguments["use_multiprocessing"], workers=fit_args.arguments["workers"], max_queue_size=fit_args.arguments["max_queue_size"], drop_leftovers=False, ) self.train_config = TFKerasTrainConfig( training_data=training_data, validation_data=validation_data, callbacks=fit_args.arguments["callbacks"], ) if train_fn: train_fn()
def from_trial( trial_inst: det.Trial, context: det.TrialContext, env: det.EnvContext, workloads: workload.Stream, load_path: Optional[pathlib.Path], rendezvous_info: det.RendezvousInfo, hvd_config: horovod.HorovodContext, ) -> det.TrialController: check.is_instance( context, keras.TFKerasTrialContext, "TFKerasTrialController needs a TFKerasTrialContext", ) context = cast(keras.TFKerasTrialContext, context) check.is_instance(trial_inst, TFKerasTrial, "TFKerasTrialController needs a TFKerasTrial") trial = cast(TFKerasTrial, trial_inst) session = TFKerasTrialController._configure_session( env, hvd_config, trial.session_config()) training_x, training_y, training_sample_weight = keras._get_x_y_and_sample_weight( input_data=trial.build_training_data_loader()) training_data = keras._adapt_keras_data( x=training_x, y=training_y, sample_weight=training_sample_weight, batch_size=context.get_per_slot_batch_size(), drop_leftovers=True, ) val_x, val_y, val_sample_weight = keras._get_x_y_and_sample_weight( input_data=trial.build_validation_data_loader()) validation_data = keras._adapt_keras_data( x=val_x, y=val_y, sample_weight=val_sample_weight, batch_size=context.get_per_slot_batch_size(), drop_leftovers=False, ) trial.build_model() check.is_not_none(context.model, "Please call wrap_model(...).") check.is_not_none(context.compile_args, "Please call model.compile(...).") compile_args = cast(inspect.BoundArguments, context.compile_args) ( context.model, compile_args.arguments["optimizer"], ) = keras._get_multi_gpu_model_and_optimizer( pre_compiled_model=context.model, optimizer=compile_args.arguments["optimizer"], env=env, hvd_config=hvd_config, profile_frequency=env.experiment_config.profile_frequency(), profile_filename=DeterminedProfiler.OUTPUT_FILENAME, ) if hvd_config.use and version.parse( tf.__version__) >= version.parse("2.0.0"): logging.info( "Calling `model.compile(...)` with `experimental_run_tf_function=False` to ensure " "TensorFlow calls `optimizer.get_gradients()` to compute gradients." ) context.model.compile(*compile_args.args, **compile_args.kwargs, experimental_run_tf_function=False) else: context.model.compile(*compile_args.args, **compile_args.kwargs) tf_keras_callbacks = trial.keras_callbacks() return TFKerasTrialController( context.model, session, keras.TFKerasTrainConfig(training_data, validation_data, tf_keras_callbacks), context, env, workloads, load_path, rendezvous_info, hvd_config, )
def test_adapt_invalid_data_type() -> None: seqs = utils.make_xor_data_sequences() test = keras._adapt_keras_data(seqs[1], batch_size=1) with pytest.raises(det.errors.InvalidDataTypeException) as err: keras._adapt_keras_data((None, test), batch_size=1) assert err is not None
def test_adapt_empty_sequence() -> None: sequence = Empty() with pytest.raises(ValueError): keras._adapt_keras_data(sequence, batch_size=1)