def _load_config(self):
     try:
         with open(self.trainer_config_path) as data_file:
             trainer_config = yaml.load(data_file)
             return trainer_config
     except IOError:
         raise UnityEnvironmentException("""Parameter file could not be found here {}.
                                         Will use default Hyper parameters"""
                                         .format(self.trainer_config_path))
     except UnicodeDecodeError:
         raise UnityEnvironmentException("There was an error decoding Trainer Config from this path : {}"
                                         .format(self.trainer_config_path))
 def _initialize_trainers(self, trainer_config, sess):
     trainer_parameters_dict = {}
     self.trainers = {}
     for brain_name in self.env.external_brain_names:
         trainer_parameters = trainer_config['default'].copy()
         if len(self.env.external_brain_names) > 1:
             graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name)
             trainer_parameters['graph_scope'] = graph_scope
             trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                 basedir=self.summaries_dir,
                 name=str(self.run_id) + '_' + graph_scope)
         else:
             trainer_parameters['graph_scope'] = ''
             trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                 basedir=self.summaries_dir,
                 name=str(self.run_id))
         if brain_name in trainer_config:
             _brain_key = brain_name
             while not isinstance(trainer_config[_brain_key], dict):
                 _brain_key = trainer_config[_brain_key]
             for k in trainer_config[_brain_key]:
                 trainer_parameters[k] = trainer_config[_brain_key][k]
         trainer_parameters_dict[brain_name] = trainer_parameters.copy()
     for brain_name in self.env.external_brain_names:
         if trainer_parameters_dict[brain_name]['trainer'] == "imitation":
             self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
                                                                  trainer_parameters_dict[brain_name],
                                                                  self.train_model, self.seed)
         elif trainer_parameters_dict[brain_name]['trainer'] == "ppo":
             self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, trainer_parameters_dict[brain_name],
                                                    self.train_model, self.seed)
         else:
             raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
                                             .format(brain_name))
 def _create_model_path(model_path):
     try:
         if not os.path.exists(model_path):
             os.makedirs(model_path)
     except Exception:
         raise UnityEnvironmentException("The folder {} containing the generated model could not be accessed."
                                         " Please make sure the permissions are set correctly."
                                         .format(model_path))
Exemple #4
0
def create_agent_model(env,
                       lr=1e-4,
                       h_size=128,
                       epsilon=0.2,
                       beta=1e-3,
                       max_step=5e6):
    """
    Takes a Unity environment and model-specific hyperparameters and returns the
    appropriate PPO agent model for the environment.
    :param env: a Unity environment.
    :param lr: Learning rate.
    :param h_size: Size of hidden layers/
    :param epsilon: Value for policy-divergence threshold.
    :param beta: Strength of entropy regularization.
    :return: a sub-class of PPOAgent tailored to the environment.
    """
    brain_name = env.brain_names[0]
    if env.brains[brain_name].action_space_type == "continuous":
        if env.brains[brain_name].number_observations == 0:
            return ContinuousControlModel(
                lr, env.brains[brain_name].state_space_size,
                env.brains[brain_name].action_space_size, h_size, epsilon,
                beta, max_step)
        else:
            raise UnityEnvironmentException(
                "There is currently no PPO model which supports both a continuous "
                "action space and camera observations.")
    if env.brains[brain_name].action_space_type == "discrete":
        if env.brains[brain_name].number_observations == 0:
            return DiscreteControlModel(
                lr, env.brains[brain_name].state_space_size,
                env.brains[brain_name].action_space_size, h_size, epsilon,
                beta, max_step)
        else:
            brain = env.brains[brain_name]
            if env.brains[brain_name].state_space_size > 0:
                print(
                    "This brain contains agents with both observations and states. There is currently no PPO model"
                    "which supports this. Defaulting to Vision-based PPO model."
                )
            h, w = brain.camera_resolutions[0][
                'height'], brain.camera_resolutions[0]['height']
            return VisualDiscreteControlModel(
                lr, h, w, env.brains[brain_name].action_space_size, h_size,
                epsilon, beta, max_step)
    def _initialize_trainers(self, trainer_config, sess):
        self.trainer_parameters_dict = {}
        self.trainers = {}
        for brain_name in self.env.external_brain_names:
            trainer_parameters = trainer_config['default'].copy()
            if len(self.env.external_brain_names) > 1:
                graph_scope = re.sub('[^0-9a-zA-Z]+', '-', brain_name)
                trainer_parameters['graph_scope'] = graph_scope
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id) + '_' + graph_scope)
            else:
                trainer_parameters['graph_scope'] = ''
                trainer_parameters['summary_path'] = '{basedir}/{name}'.format(
                    basedir=self.summaries_dir,
                    name=str(self.run_id))
            if brain_name in trainer_config:
                _brain_key = brain_name
                while not isinstance(trainer_config[_brain_key], dict):
                    _brain_key = trainer_config[_brain_key]
                for k in trainer_config[_brain_key]:
                    trainer_parameters[k] = trainer_config[_brain_key][k]
            self.trainer_parameters_dict[brain_name] = trainer_parameters.copy()
        for brain_name in self.env.external_brain_names:
            if self.trainer_parameters_dict[brain_name]['trainer'] == "imitation":
                self.trainers[brain_name] = BehavioralCloningTrainer(sess, self.env, brain_name,
                                                                     self.trainer_parameters_dict[brain_name],
                                                                     self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "ppo":
                self.trainers[brain_name] = PPOTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "dqn":
                self.trainers[brain_name] = DQNTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "madqn":
                self.trainers[brain_name] = MADQNTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "mappo":
                self.trainers[brain_name] = MAPPOTrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            elif self.trainer_parameters_dict[brain_name]['trainer'] == "coma":
                self.trainers[brain_name] = COMATrainer(sess, self.env, brain_name, self.trainer_parameters_dict[brain_name],
                                                       self.train_model, self.seed)
            else:
                raise UnityEnvironmentException("The trainer config contains an unknown trainer type for brain {}"
                                                .format(brain_name))

        all_vars = tf.trainable_variables()
        self.brain_vars = {}
        total_vars = len(all_vars)
        idx1 = 0
        idx2 = int(total_vars/len(self.env.external_brain_names))
        for brain_name in self.env.external_brain_names:
            self.brain_vars[brain_name] = all_vars[idx1:idx2]
            idx1 = idx2
            idx2 = idx2*total_vars
            if (self.trainer_parameters_dict[brain_name]['trainer'] == "dqn" or
            self.trainer_parameters_dict[brain_name]['trainer'] == "madqn"):
                self.trainers[brain_name].update_target_graph(self.brain_vars[brain_name])
                if self.trainer_parameters_dict[brain_name]['trainer'] == "madqn":
                    if not self.trainers[brain_name].parameters['frozen']:
                        self.free_brain_vars = self.brain_vars[brain_name]
        for brain_name in self.env.external_brain_names:
            if self.trainer_parameters_dict[brain_name]['trainer'] == "madqn":
                if self.trainers[brain_name].parameters['frozen']:
                    self.trainers[brain_name].update_frozen_brain_graph(self.brain_vars[brain_name], self.free_brain_vars)