def __init__( self, model: tf.keras.models.Model, session: tf.compat.v1.ConfigProto, train_config: keras.TFKerasTrainConfig, *args: Any, **kwargs: Any, ) -> None: super().__init__(*args, **kwargs) self.model = model self.session = session # Configure optimizers, done for backwards compatibility. self.context._select_optimizers() keras._check_if_aggregation_frequency_will_work( model=self.model, hvd_config=self.hvd_config) self._train_input_manager, self._validation_input_manager = keras._init_input_managers( context=self.context, train_config=train_config) # If a load path is provided, load weights and restore the data location. self._load() self._configure_callbacks(train_config.callbacks) self.train_response_func = None # type: Optional[workload.ResponseFunc] self.train_workload_metrics = [] # type: List[Dict[str, Any]] self.train_workload_batches = 0 self.train_workload_inputs = 0 self.train_workload_len = 0 self.test_inputs = 0
def __init__( self, model: tf.keras.models.Model, session: tf.compat.v1.ConfigProto, train_config: keras.TFKerasTrainConfig, *args: Any, **kwargs: Any, ) -> None: super().__init__(*args, **kwargs) self.model = model self.session = session # Configure optimizers, done for backwards compatibility. self.context._select_optimizers() keras._check_if_aggregation_frequency_will_work( model=self.model, hvd_config=self.hvd_config ) self.training_data = train_config.training_data self.validation_data = train_config.validation_data # Support the deprecated SequenceAdapter API. if isinstance(self.training_data, keras.SequenceAdapter): self.context._configure_fit( workers=self.training_data.workers, use_multiprocessing=self.training_data.use_multiprocessing, max_queue_size=self.training_data.max_queue_size, ) # Use the provided Sequence directly. self.training_data = self.training_data.sequence if isinstance(self.validation_data, keras.SequenceAdapter): # Ignore these settings and use the same settings as for the fit call. self.validation_data = self.validation_data.sequence self._check_training_data() self._check_validation_data() self.enqueuers = [] # type: List[keras._Enqueuer] # If a load path is provided, load weights and restore the data location. self._load() self._configure_callbacks(train_config.callbacks) self.train_response_func = None # type: Optional[workload.ResponseFunc] self.train_workload_metrics = [] # type: List[Dict[str, Any]] self.train_workload_batches = 0 self.train_workload_inputs = 0 self.train_workload_len = 0 self.test_inputs = 0
def __init__( self, model: tf.keras.models.Model, session: tf.compat.v1.ConfigProto, train_config: keras.TFKerasTrainConfig, trial: "TFKerasTrial", *args: Any, **kwargs: Any, ) -> None: super().__init__(*args, **kwargs) self.model = model self.session = session self.trial = trial # Configure optimizers, done for backwards compatibility. self.context._select_optimizers() keras._check_if_aggregation_frequency_will_work( model=self.model, use_horovod=self.use_horovod, aggregation_frequency=self.context._aggregation_frequency, ) self.training_data = train_config.training_data self.validation_data = train_config.validation_data # Support the deprecated SequenceAdapter API. if isinstance(self.training_data, keras.SequenceAdapter): self.context._configure_fit( workers=self.training_data.workers, use_multiprocessing=self.training_data.use_multiprocessing, max_queue_size=self.training_data.max_queue_size, ) # Use the provided Sequence directly. self.training_data = self.training_data.sequence if isinstance(self.validation_data, keras.SequenceAdapter): # Ignore these settings and use the same settings as for the fit call. self.validation_data = self.validation_data.sequence if self.context.distributed.size > 1: assert self.use_horovod, ( "TF Keras trial must be launched with a horovod backend if " "doing distributed training" ) self._check_training_data() self._check_validation_data() self.enqueuers = [] # type: List[keras._Enqueuer] self.wlsq = None # type: Optional[layers.WorkloadSequencer] if self.workloads is None: self.workloads, self.wlsq = layers.make_compatibility_workloads( self.context._core, self.env, self.context.get_global_batch_size(), ) # If a load path is provided, load weights and restore the data location. self.multiplexer_load_state = None # type: Optional[Dict] if self.env.latest_checkpoint is not None: logging.info(f"Restoring trial from checkpoint {self.env.latest_checkpoint}") with self.context._core.checkpoint.restore_path( self.env.latest_checkpoint ) as load_path: self._load(load_path) self._configure_callbacks(train_config.callbacks) self.train_response_func = None # type: Optional[workload.ResponseFunc] self.train_workload_metrics = [] # type: List[Dict[str, Any]] self.train_workload_batches = 0 self.train_workload_inputs = 0 self.train_workload_len = 0 self.test_inputs = 0 self.steps_completed = self.env.steps_completed