Beispiel #1
0
 def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]):
     """
     Initialization of the trainers
     :param trainer_config: The configurations of the trainers
     """
     trainer_parameters_dict = {}
     print("External Brains")
     print(self.external_brains)
     for brain_name in self.external_brains:
         print(brain_name)
         trainer_parameters = trainer_config['default'].copy()
         trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
             basedir=self.summaries_dir,
             name=str(self.run_id) + '_' + brain_name)
         trainer_parameters['model_path'] = '{basedir}/{name}'.format(
             basedir=self.model_path, name=brain_name)
         trainer_parameters['keep_checkpoints'] = self.keep_checkpoints
         if brain_name in trainer_config:
             _brain_key = brain_name
             while not isinstance(trainer_config[_brain_key], dict):
                 _brain_key = trainer_config[_brain_key]
             for k in trainer_config[_brain_key]:
                 trainer_parameters[k] = trainer_config[_brain_key][k]
         trainer_parameters_dict[brain_name] = trainer_parameters.copy()
     for brain_name in self.external_brains:
         if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc':
             self.trainers[brain_name] = OfflineBCTrainer(
                 self.external_brains[brain_name],
                 trainer_parameters_dict[brain_name], self.train_model,
                 self.load_model, self.seed, self.run_id)
         elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc':
             self.trainers[brain_name] = OnlineBCTrainer(
                 self.external_brains[brain_name],
                 trainer_parameters_dict[brain_name], self.train_model,
                 self.load_model, self.seed, self.run_id)
         elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo':
             print("Now implement ppo method in trainer_controller")
             print(
                 "Parameters brain name for PP////////O trainer in trainer_controller"
             )
             print(self.external_brains[brain_name])
             print("Now end of parameters")
             self.trainers[brain_name] = PPOTrainer(
                 self.external_brains[brain_name],
                 self.meta_curriculum.brains_to_curriculums[brain_name].
                 min_lesson_length if self.meta_curriculum else 0,
                 trainer_parameters_dict[brain_name], self.train_model,
                 self.load_model, self.seed, self.run_id)
             self.trainer_metrics[brain_name] = self.trainers[
                 brain_name].trainer_metrics
         else:
             raise UnityEnvironmentException('The trainer config contains '
                                             'an unknown trainer type for '
                                             'brain {}'.format(brain_name))
def initialize_trainers(
    trainer_config: Dict[str, Any],
    external_brains: Dict[str, BrainParameters],
    summaries_dir: str,
    run_id: str,
    model_path: str,
    keep_checkpoints: int,
    train_model: bool,
    load_model: bool,
    seed: int,
    meta_curriculum: MetaCurriculum = None,
    multi_gpu: bool = False,
) -> Dict[str, Trainer]:
    """
    Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as
    some general training session options.

    :param trainer_config: Original trainer configuration loaded from YAML
    :param external_brains: BrainParameters provided by the Unity environment
    :param summaries_dir: Directory to store trainer summary statistics
    :param run_id: Run ID to associate with this training run
    :param model_path: Path to save the model
    :param keep_checkpoints: How many model checkpoints to keep
    :param train_model: Whether to train the model (vs. run inference)
    :param load_model: Whether to load the model or randomly initialize
    :param seed: The random seed to use
    :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer
    :param multi_gpu: Whether to use multi-GPU training
    :return:
    """
    trainers = {}
    trainer_parameters_dict = {}
    for brain_name in external_brains:
        trainer_parameters = trainer_config["default"].copy()
        trainer_parameters["summary_path"] = "{basedir}/{name}".format(
            basedir=summaries_dir, name=str(run_id) + "_" + brain_name)
        trainer_parameters["model_path"] = "{basedir}/{name}".format(
            basedir=model_path, name=brain_name)
        trainer_parameters["keep_checkpoints"] = keep_checkpoints
        if brain_name in trainer_config:
            _brain_key: Any = brain_name
            while not isinstance(trainer_config[_brain_key], dict):
                _brain_key = trainer_config[_brain_key]
            trainer_parameters.update(trainer_config[_brain_key])
        trainer_parameters_dict[brain_name] = trainer_parameters.copy()
    for brain_name in external_brains:
        if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
            trainers[brain_name] = OfflineBCTrainer(
                external_brains[brain_name],
                trainer_parameters_dict[brain_name],
                train_model,
                load_model,
                seed,
                run_id,
            )
        elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
            trainers[brain_name] = OnlineBCTrainer(
                external_brains[brain_name],
                trainer_parameters_dict[brain_name],
                train_model,
                load_model,
                seed,
                run_id,
            )
        elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
            trainers[brain_name] = PPOTrainer(
                external_brains[brain_name],
                meta_curriculum.brains_to_curriculums[brain_name].
                min_lesson_length if meta_curriculum else 1,
                trainer_parameters_dict[brain_name],
                train_model,
                load_model,
                seed,
                run_id,
                multi_gpu,
            )
        elif trainer_parameters_dict[brain_name]["trainer"] == "sac":
            trainers[brain_name] = SACTrainer(
                external_brains[brain_name],
                meta_curriculum.brains_to_curriculums[brain_name].
                min_lesson_length if meta_curriculum else 1,
                trainer_parameters_dict[brain_name],
                train_model,
                load_model,
                seed,
                run_id,
            )
        else:
            raise UnityEnvironmentException("The trainer config contains "
                                            "an unknown trainer type for "
                                            "brain {}".format(brain_name))
    return trainers
 def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]):
     """
     Initialization of the trainers
     :param trainer_config: The configurations of the trainers
     """
     trainer_parameters_dict = {}
     for brain_name in self.external_brains:
         trainer_parameters = trainer_config["default"].copy()
         trainer_parameters["summary_path"] = "{basedir}/{name}".format(
             basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name
         )
         trainer_parameters["model_path"] = "{basedir}/{name}".format(
             basedir=self.model_path, name=brain_name
         )
         trainer_parameters["keep_checkpoints"] = self.keep_checkpoints
         if brain_name in trainer_config:
             _brain_key = brain_name
             while not isinstance(trainer_config[_brain_key], dict):
                 _brain_key = trainer_config[_brain_key]
             for k in trainer_config[_brain_key]:
                 trainer_parameters[k] = trainer_config[_brain_key][k]
         trainer_parameters_dict[brain_name] = trainer_parameters.copy()
     for brain_name in self.external_brains:
         if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
             self.trainers[brain_name] = OfflineBCTrainer(
                 self.external_brains[brain_name],
                 trainer_parameters_dict[brain_name],
                 self.train_model,
                 self.load_model,
                 self.seed,
                 self.run_id,
             )
         elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
             self.trainers[brain_name] = OnlineBCTrainer(
                 self.external_brains[brain_name],
                 trainer_parameters_dict[brain_name],
                 self.train_model,
                 self.load_model,
                 self.seed,
                 self.run_id,
             )
         elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
             self.trainers[brain_name] = PPOTrainer(
                 self.external_brains[brain_name],
                 self.meta_curriculum.brains_to_curriculums[
                     brain_name
                 ].min_lesson_length
                 if self.meta_curriculum
                 else 0,
                 trainer_parameters_dict[brain_name],
                 self.train_model,
                 self.load_model,
                 self.seed,
                 self.run_id,
             )
             self.trainer_metrics[brain_name] = self.trainers[
                 brain_name
             ].trainer_metrics
         else:
             raise UnityEnvironmentException(
                 "The trainer config contains "
                 "an unknown trainer type for "
                 "brain {}".format(brain_name)
             )
Beispiel #4
0
    def initialize_trainers(
        self,
        trainer_config: Dict[str, Any],
        external_brains: Dict[str, BrainParameters],
    ) -> None:
        """
        Initialization of the trainers
        :param trainer_config: The configurations of the trainers
        """
        trainer_parameters_dict = {}
        for brain_name in external_brains:
            trainer_parameters = trainer_config["default"].copy()
            trainer_parameters["summary_path"] = "{basedir}/{name}".format(
                basedir=self.summaries_dir,
                name=str(self.run_id) + "_" + brain_name)
            trainer_parameters["model_path"] = "{basedir}/{name}".format(
                basedir=self.model_path, name=brain_name)
            trainer_parameters["keep_checkpoints"] = self.keep_checkpoints
            if brain_name in trainer_config:
                _brain_key: Any = brain_name
                while not isinstance(trainer_config[_brain_key], dict):
                    _brain_key = trainer_config[_brain_key]
                trainer_parameters.update(trainer_config[_brain_key])
            trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in external_brains:
            if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc":
                self.trainers[brain_name] = OfflineBCTrainer(
                    brain=external_brains[brain_name],
                    trainer_parameters=trainer_parameters_dict[brain_name],
                    training=self.train_model,
                    load=self.load_model,
                    seed=self.seed,
                    run_id=self.run_id,
                )
            elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc":
                self.trainers[brain_name] = OnlineBCTrainer(
                    brain=external_brains[brain_name],
                    trainer_parameters=trainer_parameters_dict[brain_name],
                    training=self.train_model,
                    load=self.load_model,
                    seed=self.seed,
                    run_id=self.run_id,
                )
            elif trainer_parameters_dict[brain_name]["trainer"] == "ppo":
                # Find lesson length based on the form of learning
                if self.meta_curriculum:
                    lesson_length = self.meta_curriculum.brains_to_curriculums[
                        brain_name].min_lesson_length
                else:
                    lesson_length = 1

                self.trainers[brain_name] = PPOTrainer(
                    brain=external_brains[brain_name],
                    reward_buff_cap=lesson_length,
                    trainer_parameters=trainer_parameters_dict[brain_name],
                    training=self.train_model,
                    load=self.load_model,
                    seed=self.seed,
                    run_id=self.run_id,
                )
                self.trainer_metrics[brain_name] = self.trainers[
                    brain_name].trainer_metrics
            else:
                raise UnityEnvironmentException("The trainer config contains "
                                                "an unknown trainer type for "
                                                "brain {}".format(brain_name))