def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]): """ Initialization of the trainers :param trainer_config: The configurations of the trainers """ trainer_parameters_dict = {} print("External Brains") print(self.external_brains) for brain_name in self.external_brains: print(brain_name) trainer_parameters = trainer_config['default'].copy() trainer_parameters['summary_path'] = '{basedir}/{name}'.format( basedir=self.summaries_dir, name=str(self.run_id) + '_' + brain_name) trainer_parameters['model_path'] = '{basedir}/{name}'.format( basedir=self.model_path, name=brain_name) trainer_parameters['keep_checkpoints'] = self.keep_checkpoints if brain_name in trainer_config: _brain_key = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] for k in trainer_config[_brain_key]: trainer_parameters[k] = trainer_config[_brain_key][k] trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in self.external_brains: if trainer_parameters_dict[brain_name]['trainer'] == 'offline_bc': self.trainers[brain_name] = OfflineBCTrainer( self.external_brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id) elif trainer_parameters_dict[brain_name]['trainer'] == 'online_bc': self.trainers[brain_name] = OnlineBCTrainer( self.external_brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id) elif trainer_parameters_dict[brain_name]['trainer'] == 'ppo': print("Now implement ppo method in trainer_controller") print( "Parameters brain name for PP////////O trainer in trainer_controller" ) print(self.external_brains[brain_name]) print("Now end of parameters") self.trainers[brain_name] = PPOTrainer( self.external_brains[brain_name], self.meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if self.meta_curriculum else 0, trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id) self.trainer_metrics[brain_name] = self.trainers[ brain_name].trainer_metrics else: raise UnityEnvironmentException('The trainer config contains ' 'an unknown trainer type for ' 'brain {}'.format(brain_name))
def initialize_trainers( trainer_config: Dict[str, Any], external_brains: Dict[str, BrainParameters], summaries_dir: str, run_id: str, model_path: str, keep_checkpoints: int, train_model: bool, load_model: bool, seed: int, meta_curriculum: MetaCurriculum = None, multi_gpu: bool = False, ) -> Dict[str, Trainer]: """ Initializes trainers given a provided trainer configuration and set of brains from the environment, as well as some general training session options. :param trainer_config: Original trainer configuration loaded from YAML :param external_brains: BrainParameters provided by the Unity environment :param summaries_dir: Directory to store trainer summary statistics :param run_id: Run ID to associate with this training run :param model_path: Path to save the model :param keep_checkpoints: How many model checkpoints to keep :param train_model: Whether to train the model (vs. run inference) :param load_model: Whether to load the model or randomly initialize :param seed: The random seed to use :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer :param multi_gpu: Whether to use multi-GPU training :return: """ trainers = {} trainer_parameters_dict = {} for brain_name in external_brains: trainer_parameters = trainer_config["default"].copy() trainer_parameters["summary_path"] = "{basedir}/{name}".format( basedir=summaries_dir, name=str(run_id) + "_" + brain_name) trainer_parameters["model_path"] = "{basedir}/{name}".format( basedir=model_path, name=brain_name) trainer_parameters["keep_checkpoints"] = keep_checkpoints if brain_name in trainer_config: _brain_key: Any = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in external_brains: if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc": trainers[brain_name] = OfflineBCTrainer( external_brains[brain_name], trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc": trainers[brain_name] = OnlineBCTrainer( external_brains[brain_name], trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "ppo": trainers[brain_name] = PPOTrainer( external_brains[brain_name], meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if meta_curriculum else 1, trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, multi_gpu, ) elif trainer_parameters_dict[brain_name]["trainer"] == "sac": trainers[brain_name] = SACTrainer( external_brains[brain_name], meta_curriculum.brains_to_curriculums[brain_name]. min_lesson_length if meta_curriculum else 1, trainer_parameters_dict[brain_name], train_model, load_model, seed, run_id, ) else: raise UnityEnvironmentException("The trainer config contains " "an unknown trainer type for " "brain {}".format(brain_name)) return trainers
def initialize_trainers(self, trainer_config: Dict[str, Dict[str, str]]): """ Initialization of the trainers :param trainer_config: The configurations of the trainers """ trainer_parameters_dict = {} for brain_name in self.external_brains: trainer_parameters = trainer_config["default"].copy() trainer_parameters["summary_path"] = "{basedir}/{name}".format( basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name ) trainer_parameters["model_path"] = "{basedir}/{name}".format( basedir=self.model_path, name=brain_name ) trainer_parameters["keep_checkpoints"] = self.keep_checkpoints if brain_name in trainer_config: _brain_key = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] for k in trainer_config[_brain_key]: trainer_parameters[k] = trainer_config[_brain_key][k] trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in self.external_brains: if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc": self.trainers[brain_name] = OfflineBCTrainer( self.external_brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc": self.trainers[brain_name] = OnlineBCTrainer( self.external_brains[brain_name], trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "ppo": self.trainers[brain_name] = PPOTrainer( self.external_brains[brain_name], self.meta_curriculum.brains_to_curriculums[ brain_name ].min_lesson_length if self.meta_curriculum else 0, trainer_parameters_dict[brain_name], self.train_model, self.load_model, self.seed, self.run_id, ) self.trainer_metrics[brain_name] = self.trainers[ brain_name ].trainer_metrics else: raise UnityEnvironmentException( "The trainer config contains " "an unknown trainer type for " "brain {}".format(brain_name) )
def initialize_trainers( self, trainer_config: Dict[str, Any], external_brains: Dict[str, BrainParameters], ) -> None: """ Initialization of the trainers :param trainer_config: The configurations of the trainers """ trainer_parameters_dict = {} for brain_name in external_brains: trainer_parameters = trainer_config["default"].copy() trainer_parameters["summary_path"] = "{basedir}/{name}".format( basedir=self.summaries_dir, name=str(self.run_id) + "_" + brain_name) trainer_parameters["model_path"] = "{basedir}/{name}".format( basedir=self.model_path, name=brain_name) trainer_parameters["keep_checkpoints"] = self.keep_checkpoints if brain_name in trainer_config: _brain_key: Any = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] trainer_parameters.update(trainer_config[_brain_key]) trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in external_brains: if trainer_parameters_dict[brain_name]["trainer"] == "offline_bc": self.trainers[brain_name] = OfflineBCTrainer( brain=external_brains[brain_name], trainer_parameters=trainer_parameters_dict[brain_name], training=self.train_model, load=self.load_model, seed=self.seed, run_id=self.run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "online_bc": self.trainers[brain_name] = OnlineBCTrainer( brain=external_brains[brain_name], trainer_parameters=trainer_parameters_dict[brain_name], training=self.train_model, load=self.load_model, seed=self.seed, run_id=self.run_id, ) elif trainer_parameters_dict[brain_name]["trainer"] == "ppo": # Find lesson length based on the form of learning if self.meta_curriculum: lesson_length = self.meta_curriculum.brains_to_curriculums[ brain_name].min_lesson_length else: lesson_length = 1 self.trainers[brain_name] = PPOTrainer( brain=external_brains[brain_name], reward_buff_cap=lesson_length, trainer_parameters=trainer_parameters_dict[brain_name], training=self.train_model, load=self.load_model, seed=self.seed, run_id=self.run_id, ) self.trainer_metrics[brain_name] = self.trainers[ brain_name].trainer_metrics else: raise UnityEnvironmentException("The trainer config contains " "an unknown trainer type for " "brain {}".format(brain_name))