def _load_config(self): try: with open(self.trainer_config_path) as data_file: trainer_config = yaml.load(data_file) return trainer_config except IOError: raise UnityEnvironmentException("""Parameter file could not be found here {}. Will use default Hyper parameters""" .format(self.trainer_config_path)) except UnicodeDecodeError: raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}" .format(self.trainer_config_path))
def _initialize_trainers(self, trainer_config, sess): trainer_parameters_dict = {} self.trainers = {} for brain_name in self.env.external_brain_names: trainer_parameters = trainer_config['default'].copy() if len(self.env.external_brain_names) > 1: graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name) trainer_parameters['graph_scope'] = graph_scope trainer_parameters['summary_path'] = '{basedir}/{name}'.format( basedir=self.summaries_dir, name=str(self.run_id) + '_' + graph_scope) else: trainer_parameters['graph_scope'] = '' trainer_parameters['summary_path'] = '{basedir}/{name}'.format( basedir=self.summaries_dir, name=str(self.run_id)) if brain_name in trainer_config: _brain_key = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] for k in trainer_config[_brain_key]: trainer_parameters[k] = trainer_config[_brain_key][k] trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in self.env.external_brain_names: if trainer_parameters_dict[brain_name]['trainer'] == "imitation": self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name], self.train_model, self.seed) elif trainer_parameters_dict[brain_name]['trainer'] == "ppo": self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name], self.train_model, self.seed) else: raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}" .format(brain_name))
def _create_model_path(model_path): try: if not os.path.exists(model_path): os.makedirs(model_path) except Exception: raise UnityEnvironmentException("The folder {} containing the generated model could not be accessed." " Please make sure the permissions are set correctly." .format(model_path))
def create_agent_model(env, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step=5e6): """ Takes a Unity environment and model-specific hyperparameters and returns the appropriate PPO agent model for the environment. :param env: a Unity environment. :param lr: Learning rate. :param h_size: Size of hidden layers/ :param epsilon: Value for policy-divergence threshold. :param beta: Strength of entropy regularization. :return: a sub-class of PPOAgent tailored to the environment. """ brain_name = env.brain_names[0] if env.brains[brain_name].action_space_type == "continuous": if env.brains[brain_name].number_observations == 0: return ContinuousControlModel( lr, env.brains[brain_name].state_space_size, env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step) else: raise UnityEnvironmentException( "There is currently no PPO model which supports both a continuous " "action space and camera observations.") if env.brains[brain_name].action_space_type == "discrete": if env.brains[brain_name].number_observations == 0: return DiscreteControlModel( lr, env.brains[brain_name].state_space_size, env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step) else: brain = env.brains[brain_name] if env.brains[brain_name].state_space_size > 0: print( "This brain contains agents with both observations and states. There is currently no PPO model" "which supports this. Defaulting to Vision-based PPO model." ) h, w = brain.camera_resolutions[0][ 'height'], brain.camera_resolutions[0]['height'] return VisualDiscreteControlModel( lr, h, w, env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step)
def _initialize_trainers(self, trainer_config, sess): self.trainer_parameters_dict = {} self.trainers = {} for brain_name in self.env.external_brain_names: trainer_parameters = trainer_config['default'].copy() if len(self.env.external_brain_names) > 1: graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name) trainer_parameters['graph_scope'] = graph_scope trainer_parameters['summary_path'] = '{basedir}/{name}'.format( basedir=self.summaries_dir, name=str(self.run_id) + '_' + graph_scope) else: trainer_parameters['graph_scope'] = '' trainer_parameters['summary_path'] = '{basedir}/{name}'.format( basedir=self.summaries_dir, name=str(self.run_id)) if brain_name in trainer_config: _brain_key = brain_name while not isinstance(trainer_config[_brain_key], dict): _brain_key = trainer_config[_brain_key] for k in trainer_config[_brain_key]: trainer_parameters[k] = trainer_config[_brain_key][k] self.trainer_parameters_dict[brain_name] = trainer_parameters.copy() for brain_name in self.env.external_brain_names: if self.trainer_parameters_dict[brain_name]['trainer'] == "imitation": self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name], self.train_model, self.seed) elif self.trainer_parameters_dict[brain_name]['trainer'] == "ppo": self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name], self.train_model, self.seed) elif self.trainer_parameters_dict[brain_name]['trainer'] == "dqn": self.trainers[brain_name] = DQNTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name], self.train_model, self.seed) elif self.trainer_parameters_dict[brain_name]['trainer'] == "madqn": self.trainers[brain_name] = MADQNTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name], self.train_model, self.seed) elif self.trainer_parameters_dict[brain_name]['trainer'] == "mappo": self.trainers[brain_name] = MAPPOTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name], self.train_model, self.seed) elif self.trainer_parameters_dict[brain_name]['trainer'] == "coma": self.trainers[brain_name] = COMATrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name], self.train_model, self.seed) else: raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}" .format(brain_name)) all_vars = tf.trainable_variables() self.brain_vars = {} total_vars = len(all_vars) idx1 = 0 idx2 = int(total_vars/len(self.env.external_brain_names)) for brain_name in self.env.external_brain_names: self.brain_vars[brain_name] = all_vars[idx1:idx2] idx1 = idx2 idx2 = idx2*total_vars if (self.trainer_parameters_dict[brain_name]['trainer'] == "dqn" or self.trainer_parameters_dict[brain_name]['trainer'] == "madqn"): self.trainers[brain_name].update_target_graph(self.brain_vars[brain_name]) if self.trainer_parameters_dict[brain_name]['trainer'] == "madqn": if not self.trainers[brain_name].parameters['frozen']: self.free_brain_vars = self.brain_vars[brain_name] for brain_name in self.env.external_brain_names: if self.trainer_parameters_dict[brain_name]['trainer'] == "madqn": if self.trainers[brain_name].parameters['frozen']: self.trainers[brain_name].update_frozen_brain_graph(self.brain_vars[brain_name], self.free_brain_vars)