def advance(self, env: EnvManager) -> int: with hierarchical_timer("env_step"): time_start_step = time() new_step_infos = env.step() delta_time_step = time() - time_start_step for step_info in new_step_infos: for brain_name, trainer in self.trainers.items(): if brain_name in self.trainer_metrics: self.trainer_metrics[brain_name].add_delta_step( delta_time_step) trainer.add_experiences( step_info.previous_all_brain_info, step_info.current_all_brain_info, step_info.brain_name_to_action_info[brain_name].outputs, ) trainer.process_experiences(step_info.previous_all_brain_info, step_info.current_all_brain_info) for brain_name, trainer in self.trainers.items(): if brain_name in self.trainer_metrics: self.trainer_metrics[brain_name].add_delta_step( delta_time_step) if self.train_model and trainer.get_step <= trainer.get_max_steps: trainer.increment_step(len(new_step_infos)) if trainer.is_ready_update(): # Perform gradient descent with experience buffer with hierarchical_timer("update_policy"): trainer.update_policy() env.set_policy(brain_name, trainer.policy) return len(new_step_infos)
def start_trainer(self, trainer: Trainer, env_manager: EnvManager) -> None: self.trainers[trainer.brain_name] = trainer self.logger.info(trainer) if self.train_model: trainer.write_tensorboard_text("Hyperparameters", trainer.parameters) env_manager.set_policy(trainer.brain_name, trainer.policy)
def start_learning(self, env_manager: EnvManager, trainer_config: Dict[str, Any]) -> None: # TODO: Should be able to start learning at different lesson numbers # for each curriculum. if self.meta_curriculum is not None: self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson) self._create_model_path(self.model_path) tf.reset_default_graph() # Prevent a single session from taking all GPU memory. self.initialize_trainers(trainer_config, env_manager.external_brains) for _, t in self.trainers.items(): self.logger.info(t) global_step = 0 if self.train_model: for brain_name, trainer in self.trainers.items(): trainer.write_tensorboard_text("Hyperparameters", trainer.parameters) try: for brain_name, trainer in self.trainers.items(): env_manager.set_policy(brain_name, trainer.policy) self._reset_env(env_manager) while self._not_done_training(): n_steps = self.advance(env_manager) for i in range(n_steps): global_step += 1 self.reset_env_if_ready(env_manager, global_step) if self._should_save_model(global_step): # Save Tensorflow model self._save_model() self.write_to_tensorboard(global_step) # Final save Tensorflow model if global_step != 0 and self.train_model: self._save_model() except (KeyboardInterrupt, UnityCommunicationException): if self.train_model: self._save_model_when_interrupted() pass if self.train_model: self._write_training_metrics() self._export_graph() self._write_timing_tree() env_manager.close()
def _reset_env(self, env: EnvManager) -> List[EnvironmentStep]: """Resets the environment. Returns: A Data structure corresponding to the initial reset state of the environment. """ sampled_reset_param = self.sampler_manager.sample_all() new_meta_curriculum_config = (self.meta_curriculum.get_config() if self.meta_curriculum else {}) sampled_reset_param.update(new_meta_curriculum_config) return env.reset(config=sampled_reset_param)
def start_learning(self, env_manager: EnvManager) -> None: self._create_model_path(self.model_path) tf.reset_default_graph() for _, t in self.trainers.items(): self.logger.info(t) global_step = 0 if self.train_model: for brain_name, trainer in self.trainers.items(): trainer.write_tensorboard_text("Hyperparameters", trainer.parameters) try: for brain_name, trainer in self.trainers.items(): env_manager.set_policy(brain_name, trainer.policy) self._reset_env(env_manager) while self._not_done_training(): n_steps = self.advance(env_manager) for i in range(n_steps): global_step += 1 self.reset_env_if_ready(env_manager, global_step) if self._should_save_model(global_step): # Save Tensorflow model self._save_model() self.write_to_tensorboard(global_step) # Final save Tensorflow model if global_step != 0 and self.train_model: self._save_model() except (KeyboardInterrupt, UnityCommunicationException): if self.train_model: self._save_model_when_interrupted() pass if self.train_model: self._write_training_metrics() self._export_graph() self._write_timing_tree() env_manager.close()
def start_learning(self, env_manager: EnvManager) -> None: self._create_model_path(self.model_path) tf.reset_default_graph() global_step = 0 last_brain_names: Set[str] = set() try: self._reset_env(env_manager) while self._not_done_training(): external_brains = set(env_manager.external_brains.keys()) new_brains = external_brains - last_brain_names if last_brain_names != env_manager.external_brains.keys(): for name in new_brains: trainer = self.trainer_factory.generate( env_manager.external_brains[name] ) self.start_trainer(trainer, env_manager) last_brain_names = external_brains n_steps = self.advance(env_manager) for i in range(n_steps): global_step += 1 self.reset_env_if_ready(env_manager, global_step) if self._should_save_model(global_step): # Save Tensorflow model self._save_model() self.write_to_tensorboard(global_step) # Final save Tensorflow model if global_step != 0 and self.train_model: self._save_model() except (KeyboardInterrupt, UnityCommunicationException): if self.train_model: self._save_model_when_interrupted() pass if self.train_model: self._write_training_metrics() self._export_graph() self._write_timing_tree() env_manager.close()