def advance(self, env: EnvManager) -> int:
        with hierarchical_timer("env_step"):
            time_start_step = time()
            new_step_infos = env.step()
            delta_time_step = time() - time_start_step

        for step_info in new_step_infos:
            for brain_name, trainer in self.trainers.items():
                if brain_name in self.trainer_metrics:
                    self.trainer_metrics[brain_name].add_delta_step(
                        delta_time_step)
                trainer.add_experiences(
                    step_info.previous_all_brain_info,
                    step_info.current_all_brain_info,
                    step_info.brain_name_to_action_info[brain_name].outputs,
                )
                trainer.process_experiences(step_info.previous_all_brain_info,
                                            step_info.current_all_brain_info)
        for brain_name, trainer in self.trainers.items():
            if brain_name in self.trainer_metrics:
                self.trainer_metrics[brain_name].add_delta_step(
                    delta_time_step)
            if self.train_model and trainer.get_step <= trainer.get_max_steps:
                trainer.increment_step(len(new_step_infos))
                if trainer.is_ready_update():
                    # Perform gradient descent with experience buffer
                    with hierarchical_timer("update_policy"):
                        trainer.update_policy()
                    env.set_policy(brain_name, trainer.policy)
        return len(new_step_infos)
 def start_trainer(self, trainer: Trainer, env_manager: EnvManager) -> None:
     self.trainers[trainer.brain_name] = trainer
     self.logger.info(trainer)
     if self.train_model:
         trainer.write_tensorboard_text("Hyperparameters",
                                        trainer.parameters)
     env_manager.set_policy(trainer.brain_name, trainer.policy)
Exemple #3
0
    def start_learning(self, env_manager: EnvManager,
                       trainer_config: Dict[str, Any]) -> None:
        # TODO: Should be able to start learning at different lesson numbers
        # for each curriculum.
        if self.meta_curriculum is not None:
            self.meta_curriculum.set_all_curriculums_to_lesson_num(self.lesson)
        self._create_model_path(self.model_path)

        tf.reset_default_graph()

        # Prevent a single session from taking all GPU memory.
        self.initialize_trainers(trainer_config, env_manager.external_brains)
        for _, t in self.trainers.items():
            self.logger.info(t)

        global_step = 0

        if self.train_model:
            for brain_name, trainer in self.trainers.items():
                trainer.write_tensorboard_text("Hyperparameters",
                                               trainer.parameters)
        try:
            for brain_name, trainer in self.trainers.items():
                env_manager.set_policy(brain_name, trainer.policy)
            self._reset_env(env_manager)
            while self._not_done_training():
                n_steps = self.advance(env_manager)
                for i in range(n_steps):
                    global_step += 1
                    self.reset_env_if_ready(env_manager, global_step)
                    if self._should_save_model(global_step):
                        # Save Tensorflow model
                        self._save_model()
                    self.write_to_tensorboard(global_step)
            # Final save Tensorflow model
            if global_step != 0 and self.train_model:
                self._save_model()
        except (KeyboardInterrupt, UnityCommunicationException):
            if self.train_model:
                self._save_model_when_interrupted()
            pass
        if self.train_model:
            self._write_training_metrics()
            self._export_graph()
        self._write_timing_tree()
        env_manager.close()
    def _reset_env(self, env: EnvManager) -> List[EnvironmentStep]:
        """Resets the environment.

        Returns:
            A Data structure corresponding to the initial reset state of the
            environment.
        """
        sampled_reset_param = self.sampler_manager.sample_all()
        new_meta_curriculum_config = (self.meta_curriculum.get_config()
                                      if self.meta_curriculum else {})
        sampled_reset_param.update(new_meta_curriculum_config)
        return env.reset(config=sampled_reset_param)
    def start_learning(self, env_manager: EnvManager) -> None:
        self._create_model_path(self.model_path)

        tf.reset_default_graph()

        for _, t in self.trainers.items():
            self.logger.info(t)

        global_step = 0

        if self.train_model:
            for brain_name, trainer in self.trainers.items():
                trainer.write_tensorboard_text("Hyperparameters",
                                               trainer.parameters)
        try:
            for brain_name, trainer in self.trainers.items():
                env_manager.set_policy(brain_name, trainer.policy)
            self._reset_env(env_manager)
            while self._not_done_training():
                n_steps = self.advance(env_manager)
                for i in range(n_steps):
                    global_step += 1
                    self.reset_env_if_ready(env_manager, global_step)
                    if self._should_save_model(global_step):
                        # Save Tensorflow model
                        self._save_model()
                    self.write_to_tensorboard(global_step)
            # Final save Tensorflow model
            if global_step != 0 and self.train_model:
                self._save_model()
        except (KeyboardInterrupt, UnityCommunicationException):
            if self.train_model:
                self._save_model_when_interrupted()
            pass
        if self.train_model:
            self._write_training_metrics()
            self._export_graph()
        self._write_timing_tree()
        env_manager.close()
Exemple #6
0
 def start_learning(self, env_manager: EnvManager) -> None:
     self._create_model_path(self.model_path)
     tf.reset_default_graph()
     global_step = 0
     last_brain_names: Set[str] = set()
     try:
         self._reset_env(env_manager)
         while self._not_done_training():
             external_brains = set(env_manager.external_brains.keys())
             new_brains = external_brains - last_brain_names
             if last_brain_names != env_manager.external_brains.keys():
                 for name in new_brains:
                     trainer = self.trainer_factory.generate(
                         env_manager.external_brains[name]
                     )
                     self.start_trainer(trainer, env_manager)
                 last_brain_names = external_brains
             n_steps = self.advance(env_manager)
             for i in range(n_steps):
                 global_step += 1
                 self.reset_env_if_ready(env_manager, global_step)
                 if self._should_save_model(global_step):
                     # Save Tensorflow model
                     self._save_model()
                 self.write_to_tensorboard(global_step)
         # Final save Tensorflow model
         if global_step != 0 and self.train_model:
             self._save_model()
     except (KeyboardInterrupt, UnityCommunicationException):
         if self.train_model:
             self._save_model_when_interrupted()
         pass
     if self.train_model:
         self._write_training_metrics()
         self._export_graph()
     self._write_timing_tree()
     env_manager.close()