コード例 #1
0
 def start_trainer(self, trainer: Trainer, env_manager: EnvManager) -> None:
     self.trainers[trainer.brain_name] = trainer
     self.logger.info(trainer)
     if self.train_model:
         trainer.write_tensorboard_text("Hyperparameters",
                                        trainer.parameters)
     env_manager.set_policy(trainer.brain_name, trainer.policy)
コード例 #2
0
    def _create_trainer_and_manager(self, env_manager: EnvManager,
                                    name_behavior_id: str) -> None:
        brain_name = BehaviorIdentifiers.from_name_behavior_id(
            name_behavior_id).brain_name
        try:
            trainer = self.trainers[brain_name]
        except KeyError:
            trainer = self.trainer_factory.generate(brain_name)
            self.trainers[brain_name] = trainer
            self.logger.info(trainer)
            if self.train_model:
                trainer.write_tensorboard_text("Hyperparameters",
                                               trainer.parameters)

        # print("*/*/*/*/*/*/*/*/*/*/*/*/*")
        # print(trainer)
        # print("*/*/*/*/*/*/*/*/*/*/*/*/*")
        policy = trainer.create_policy(
            env_manager.external_brains[name_behavior_id])
        trainer.add_policy(name_behavior_id, policy)
        agent_manager = AgentManager(
            policy,
            name_behavior_id,
            trainer.stats_reporter,
            trainer.parameters.get("time_horizon", sys.maxsize),
        )

        env_manager.set_agent_manager(name_behavior_id, agent_manager)
        env_manager.set_policy(name_behavior_id, policy)
        self.brain_name_to_identifier[brain_name].add(name_behavior_id)

        trainer.publish_policy_queue(agent_manager.policy_queue)
        trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
コード例 #3
0
 def advance(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         new_step_infos = env.step()
     for step_info in new_step_infos:
         for brain_name, trainer in self.trainers.items():
             if step_info.has_actions_for_brain(brain_name):
                 _processor = self.managers[brain_name].processor
                 _processor.add_experiences(
                     step_info.previous_all_brain_info[brain_name],
                     step_info.current_all_brain_info[brain_name],
                     step_info.brain_name_to_action_info[brain_name].
                     outputs,
                 )
     for brain_name, trainer in self.trainers.items():
         if self.train_model and trainer.get_step <= trainer.get_max_steps:
             trainer.increment_step(len(new_step_infos))
             if trainer.is_ready_update():
                 # Perform gradient descent with experience buffer
                 with hierarchical_timer("update_policy"):
                     trainer.update_policy()
                 env.set_policy(brain_name, trainer.policy)
         else:
             # Avoid memory leak during inference
             # Eventually this whole block will take place in advance()
             # But currently this only calls clear_update_buffer() in RLTrainer
             # and nothing in the base class
             trainer.advance()
     return len(new_step_infos)
コード例 #4
0
ファイル: trainer_controller.py プロジェクト: wszhs/ml-agents
    def _create_trainer_and_manager(
        self, env_manager: EnvManager, name_behavior_id: str
    ) -> None:

        parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id(name_behavior_id)
        brain_name = parsed_behavior_id.brain_name
        try:
            trainer = self.trainers[brain_name]
        except KeyError:
            trainer = self.trainer_factory.generate(brain_name)
            self.trainers[brain_name] = trainer

        policy = trainer.create_policy(
            parsed_behavior_id, env_manager.external_brains[name_behavior_id]
        )
        trainer.add_policy(parsed_behavior_id, policy)

        agent_manager = AgentManager(
            policy,
            name_behavior_id,
            trainer.stats_reporter,
            trainer.parameters.get("time_horizon", sys.maxsize),
        )
        env_manager.set_agent_manager(name_behavior_id, agent_manager)
        env_manager.set_policy(name_behavior_id, policy)
        self.brain_name_to_identifier[brain_name].add(name_behavior_id)

        trainer.publish_policy_queue(agent_manager.policy_queue)
        trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
コード例 #5
0
 def _get_and_process_experiences(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         # Get new policies if found
         for brain_name in self.trainers.keys():
             for name_behavior_id in self.brain_name_to_identifier[
                     brain_name]:
                 try:
                     _policy = self.managers[
                         name_behavior_id].policy_queue.get_nowait()
                     env.set_policy(name_behavior_id, _policy)
                 except AgentManagerQueue.Empty:
                     pass
         # Step the environment
         new_step_infos = env.step()
     # Add to AgentProcessor
     for step_info in new_step_infos:
         for name_behavior_id in step_info.name_behavior_ids:
             if name_behavior_id not in self.managers:
                 self.logger.warning(
                     "Agent manager was not created for behavior id {}.".
                     format(name_behavior_id))
                 continue
             self.managers[name_behavior_id].add_experiences(
                 step_info.previous_all_brain_info[name_behavior_id],
                 step_info.current_all_brain_info[name_behavior_id],
                 step_info.brain_name_to_action_info[name_behavior_id].
                 outputs,
             )
     return len(new_step_infos)
コード例 #6
0
ファイル: trainer_controller.py プロジェクト: jaklw/ml-agents
    def _create_trainer_and_manager(self, env_manager: EnvManager,
                                    name_behavior_id: str) -> None:

        parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id(
            name_behavior_id)
        brain_name = parsed_behavior_id.brain_name
        try:
            trainer = self.trainers[brain_name]
        except KeyError:
            trainer = self.trainer_factory.generate(brain_name)
            self.trainers[brain_name] = trainer

        policy = trainer.create_policy(
            parsed_behavior_id, env_manager.external_brains[name_behavior_id])
        trainer.add_policy(parsed_behavior_id, policy)

        agent_manager = AgentManager(
            policy,
            name_behavior_id,
            trainer.stats_reporter,
            trainer.parameters.get("time_horizon", sys.maxsize),
        )
        env_manager.set_agent_manager(name_behavior_id, agent_manager)
        env_manager.set_policy(name_behavior_id, policy)
        self.brain_name_to_identifier[brain_name].add(name_behavior_id)

        trainer.publish_policy_queue(agent_manager.policy_queue)
        trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)
        if trainer.threaded:
            # Start trainer thread
            trainerthread = threading.Thread(target=self.trainer_update_func,
                                             args=(trainer, ),
                                             daemon=True)
            trainerthread.start()
            self.trainer_threads.append(trainerthread)
コード例 #7
0
    def _create_trainer_and_manager(
        self, env_manager: EnvManager, name_behavior_id: str
    ) -> None:

        parsed_behavior_id = BehaviorIdentifiers.from_name_behavior_id(name_behavior_id)
        brain_name = parsed_behavior_id.brain_name
        trainerthread = None
        if brain_name in self.trainers:
            trainer = self.trainers[brain_name]
        else:
            trainer = self.trainer_factory.generate(brain_name)
            self.trainers[brain_name] = trainer
            if trainer.threaded:
                # Only create trainer thread for new trainers
                trainerthread = threading.Thread(
                    target=self.trainer_update_func, args=(trainer,), daemon=True
                )
                self.trainer_threads.append(trainerthread)
            env_manager.on_training_started(
                brain_name, self.trainer_factory.trainer_config[brain_name]
            )

        policy = trainer.create_policy(
            parsed_behavior_id,
            env_manager.training_behaviors[name_behavior_id],
            create_graph=True,
        )
        trainer.add_policy(parsed_behavior_id, policy)

        agent_manager = AgentManager(
            policy,
            name_behavior_id,
            trainer.stats_reporter,
            trainer.parameters.time_horizon,
            threaded=trainer.threaded,
        )
        env_manager.set_agent_manager(name_behavior_id, agent_manager)
        env_manager.set_policy(name_behavior_id, policy)
        self.brain_name_to_identifier[brain_name].add(name_behavior_id)

        trainer.publish_policy_queue(agent_manager.policy_queue)
        trainer.subscribe_trajectory_queue(agent_manager.trajectory_queue)

        # Only start new trainers
        if trainerthread is not None:
            trainerthread.start()
コード例 #8
0
 def _get_and_process_experiences(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         # Get new policies if found
         for brain_name in self.trainers.keys():
             for name_behavior_id in self.brain_name_to_identifier[
                     brain_name]:
                 try:
                     _policy = self.managers[
                         name_behavior_id].policy_queue.get_nowait()
                     env.set_policy(name_behavior_id, _policy)
                 except AgentManagerQueue.Empty:
                     pass
         # Step the environment
         new_step_infos = env.step()
     # Add to AgentProcessor
     num_step_infos = self._process_step_infos(new_step_infos)
     return num_step_infos
コード例 #9
0
    def advance(self, env: EnvManager) -> int:
        with hierarchical_timer("env_step"):
            new_step_infos = env.step()
        for step_info in new_step_infos:
            for name_behavior_id in step_info.name_behavior_ids:
                if name_behavior_id not in self.managers:
                    self.logger.warning(
                        "Agent manager was not created for behavior id {}.".
                        format(name_behavior_id))
                    continue
                _processor = self.managers[name_behavior_id].processor
                _processor.add_experiences(
                    step_info.previous_all_brain_info[name_behavior_id],
                    step_info.current_all_brain_info[name_behavior_id],
                    step_info.brain_name_to_action_info[name_behavior_id].
                    outputs,
                )

        for brain_name, trainer in self.trainers.items():
            if self.train_model and trainer.get_step <= trainer.get_max_steps:
                n_steps = len(new_step_infos)
                trainer.increment_step(n_steps)
                for name_behavior_id in self.brain_name_to_identifier[
                        brain_name]:
                    trainer.get_policy(name_behavior_id).increment_step(
                        n_steps)
                if trainer.is_ready_update():
                    # Perform gradient descent with experience buffer
                    with hierarchical_timer("update_policy"):
                        trainer.update_policy()
                    for name_behavior_id in self.brain_name_to_identifier[
                            brain_name]:
                        env.set_policy(name_behavior_id,
                                       trainer.get_policy(name_behavior_id))
            else:
                # Avoid memory leak during inference
                # Eventually this whole block will take place in advance()
                # But currently this only calls clear_update_buffer() in RLTrainer
                # and nothing in the base class
                trainer.advance()
        return len(new_step_infos)
コード例 #10
0
 def advance(self, env: EnvManager) -> int:
     with hierarchical_timer("env_step"):
         time_start_step = time()
         new_step_infos = env.step()
         delta_time_step = time() - time_start_step
     for step_info in new_step_infos:
         for brain_name, trainer in self.trainers.items():
             if brain_name in self.trainer_metrics:
                 self.trainer_metrics[brain_name].add_delta_step(
                     delta_time_step)
             if step_info.has_actions_for_brain(brain_name):
                 trainer.add_experiences(
                     step_info.previous_all_brain_info[brain_name],
                     step_info.current_all_brain_info[brain_name],
                     step_info.brain_name_to_action_info[brain_name].
                     outputs,
                 )
                 trainer.process_experiences(
                     step_info.previous_all_brain_info[brain_name],
                     step_info.current_all_brain_info[brain_name],
                 )
     for brain_name, trainer in self.trainers.items():
         if brain_name in self.trainer_metrics:
             self.trainer_metrics[brain_name].add_delta_step(
                 delta_time_step)
         if self.train_model and trainer.get_step <= trainer.get_max_steps:
             trainer.increment_step(len(new_step_infos))
             if trainer.is_ready_update():
                 # Perform gradient descent with experience buffer
                 with hierarchical_timer("update_policy"):
                     trainer.update_policy()
                 env.set_policy(brain_name, trainer.policy)
         else:
             # Avoid memory leak during inference
             trainer.clear_update_buffer()
     return len(new_step_infos)
コード例 #11
0
    def start_learning(self, env_manager: EnvManager) -> None:
        self._create_model_path(self.model_path)
        tf.reset_default_graph()
        global_step = 0
        last_brain_behavior_ids: Set[str] = set()
        try:
            self._reset_env(env_manager)
            while self._not_done_training():
                external_brain_behavior_ids = set(
                    env_manager.external_brains.keys())
                new_behavior_ids = external_brain_behavior_ids - last_brain_behavior_ids
                for name_behavior_id in new_behavior_ids:
                    try:
                        brain_name, _ = name_behavior_id.split("?")
                    except ValueError:
                        brain_name = name_behavior_id

                    try:
                        trainer = self.trainers[brain_name]
                    except KeyError:
                        trainer = self.trainer_factory.generate(brain_name)
                        self.trainers[brain_name] = trainer
                        self.logger.info(trainer)
                        if self.train_model:
                            trainer.write_tensorboard_text(
                                "Hyperparameters", trainer.parameters)

                    policy = trainer.create_policy(
                        env_manager.external_brains[name_behavior_id])
                    trainer.add_policy(name_behavior_id, policy)

                    env_manager.set_policy(name_behavior_id, policy)

                    self.brain_name_to_identifier[brain_name].add(
                        name_behavior_id)

                    agent_manager = AgentManager(processor=AgentProcessor(
                        trainer,
                        policy,
                        name_behavior_id,
                        trainer.stats_reporter,
                        trainer.parameters.get("time_horizon", sys.maxsize),
                    ))
                    self.managers[name_behavior_id] = agent_manager

                last_brain_behavior_ids = external_brain_behavior_ids

                n_steps = self.advance(env_manager)
                for _ in range(n_steps):
                    global_step += 1
                    self.reset_env_if_ready(env_manager, global_step)
                    if self._should_save_model(global_step):
                        # Save Tensorflow model
                        self._save_model()
                    self.write_to_tensorboard(global_step)
            # Final save Tensorflow model
            if global_step != 0 and self.train_model:
                self._save_model()
        except (KeyboardInterrupt, UnityCommunicationException):
            if self.train_model:
                self._save_model_when_interrupted()
            pass
        if self.train_model:
            self._export_graph()
        self._write_timing_tree()