コード例 #1
0
def test_globaltrainingstatus(tmpdir):
    path_dir = os.path.join(tmpdir, "test.json")

    GlobalTrainingStatus.set_parameter_state("Category1",
                                             StatusType.LESSON_NUM, 3)
    GlobalTrainingStatus.save_state(path_dir)

    with open(path_dir) as fp:
        test_json = json.load(fp)

    assert "Category1" in test_json
    assert StatusType.LESSON_NUM.value in test_json["Category1"]
    assert test_json["Category1"][StatusType.LESSON_NUM.value] == 3
    assert "metadata" in test_json

    GlobalTrainingStatus.load_state(path_dir)
    restored_val = GlobalTrainingStatus.get_parameter_state(
        "Category1", StatusType.LESSON_NUM)
    assert restored_val == 3

    # Test unknown categories and status types (keys)
    unknown_category = GlobalTrainingStatus.get_parameter_state(
        "Category3", StatusType.LESSON_NUM)

    class FakeStatusType(Enum):
        NOTAREALKEY = "notarealkey"

    unknown_key = GlobalTrainingStatus.get_parameter_state(
        "Category1", FakeStatusType.NOTAREALKEY)
    assert unknown_category is None
    assert unknown_key is None
コード例 #2
0
 def log_current_lesson(self, parameter_name: Optional[str] = None) -> None:
     """
     Logs the current lesson number and sampler value of the parameter with name
     parameter_name. If no parameter_name is provided, the values and lesson
     numbers of all parameters will be displayed.
     """
     if parameter_name is not None:
         settings = self._dict_settings[parameter_name]
         lesson_number = GlobalTrainingStatus.get_parameter_state(
             parameter_name, StatusType.LESSON_NUM
         )
         lesson_name = settings.curriculum[lesson_number].name
         lesson_value = settings.curriculum[lesson_number].value
         logger.info(
             f"Parameter '{parameter_name}' is in lesson '{lesson_name}' "
             f"and has value '{lesson_value}'."
         )
     else:
         for parameter_name, settings in self._dict_settings.items():
             lesson_number = GlobalTrainingStatus.get_parameter_state(
                 parameter_name, StatusType.LESSON_NUM
             )
             lesson_name = settings.curriculum[lesson_number].name
             lesson_value = settings.curriculum[lesson_number].value
             logger.info(
                 f"Parameter '{parameter_name}' is in lesson '{lesson_name}' "
                 f"and has value '{lesson_value}'."
             )
コード例 #3
0
 def __init__(
     self,
     settings: Optional[Dict[str, EnvironmentParameterSettings]] = None,
     run_seed: int = -1,
     restore: bool = False,
 ):
     """
     EnvironmentParameterManager manages all the environment parameters of a training
     session. It determines when parameters should change and gives access to the
     current sampler of each parameter.
     :param settings: A dictionary from environment parameter to
     EnvironmentParameterSettings.
     :param run_seed: When the seed is not provided for an environment parameter,
     this seed will be used instead.
     :param restore: If true, the EnvironmentParameterManager will use the
     GlobalTrainingStatus to try and reload the lesson status of each environment
     parameter.
     """
     if settings is None:
         settings = {}
     self._dict_settings = settings
     for parameter_name in self._dict_settings.keys():
         initial_lesson = GlobalTrainingStatus.get_parameter_state(
             parameter_name, StatusType.LESSON_NUM)
         if initial_lesson is None or not restore:
             GlobalTrainingStatus.set_parameter_state(
                 parameter_name, StatusType.LESSON_NUM, 0)
     self._smoothed_values: Dict[str, float] = defaultdict(float)
     for key in self._dict_settings.keys():
         self._smoothed_values[key] = 0.0
     # Update the seeds of the samplers
     self._set_sampler_seeds(run_seed)
コード例 #4
0
 def get_checkpoints(behavior_name: str) -> List[Dict[str, Any]]:
     checkpoint_list = GlobalTrainingStatus.get_parameter_state(
         behavior_name, StatusType.CHECKPOINTS)
     if not checkpoint_list:
         checkpoint_list = []
         GlobalTrainingStatus.set_parameter_state(behavior_name,
                                                  StatusType.CHECKPOINTS,
                                                  checkpoint_list)
     return checkpoint_list
コード例 #5
0
 def get_current_lesson_number(self) -> Dict[str, int]:
     """
     Creates a dictionary from environment parameter to the current lesson number.
     If not using curriculum, this number is always 0 for that environment parameter.
     """
     result: Dict[str, int] = {}
     for parameter_name in self._dict_settings.keys():
         result[parameter_name] = GlobalTrainingStatus.get_parameter_state(
             parameter_name, StatusType.LESSON_NUM)
     return result
コード例 #6
0
    def update_lessons(
        self,
        trainer_steps: Dict[str, int],
        trainer_max_steps: Dict[str, int],
        trainer_reward_buffer: Dict[str, List[float]],
    ) -> Tuple[bool, bool]:
        """
        Given progress metrics, calculates if at least one environment parameter is
        in a new lesson and if at least one environment parameter requires the env
        to reset.
        :param trainer_steps: A dictionary from behavior_name to the number of training
        steps this behavior's trainer has performed.
        :param trainer_max_steps: A dictionary from behavior_name to the maximum number
        of training steps this behavior's trainer has performed.
        :param trainer_reward_buffer: A dictionary from behavior_name to the list of
        the most recent episode returns for this behavior's trainer.
        :returns: A tuple of two booleans : (True if any lesson has changed, True if
        environment needs to reset)
        """
        must_reset = False
        updated = False
        for param_name, settings in self._dict_settings.items():
            lesson_num = GlobalTrainingStatus.get_parameter_state(
                param_name, StatusType.LESSON_NUM)
            next_lesson_num = lesson_num + 1
            lesson = settings.curriculum[lesson_num]
            if (lesson.completion_criteria is not None
                    and len(settings.curriculum) > next_lesson_num):
                behavior_to_consider = lesson.completion_criteria.behavior
                if behavior_to_consider in trainer_steps:
                    must_increment, new_smoothing = lesson.completion_criteria.need_increment(
                        float(trainer_steps[behavior_to_consider]) /
                        float(trainer_max_steps[behavior_to_consider]),
                        trainer_reward_buffer[behavior_to_consider],
                        self._smoothed_values[param_name],
                    )
                    self._smoothed_values[param_name] = new_smoothing
                    if must_increment:
                        GlobalTrainingStatus.set_parameter_state(
                            param_name, StatusType.LESSON_NUM, next_lesson_num)
                        new_lesson_name = settings.curriculum[
                            next_lesson_num].name
                        new_lesson_value = settings.curriculum[
                            next_lesson_num].value

                        logger.info(
                            f"Parameter '{param_name}' has been updated to {new_lesson_value}."
                            + f" Now in lesson '{new_lesson_name}'")
                        updated = True
                        if lesson.completion_criteria.require_reset:
                            must_reset = True
        return updated, must_reset
コード例 #7
0
 def get_current_samplers(
         self) -> Dict[str, ParameterRandomizationSettings]:
     """
     Creates a dictionary from environment parameter name to their corresponding
     ParameterRandomizationSettings. If curriculum is used, the
     ParameterRandomizationSettings corresponds to the sampler of the current lesson.
     """
     samplers: Dict[str, ParameterRandomizationSettings] = {}
     for param_name, settings in self._dict_settings.items():
         lesson_num = GlobalTrainingStatus.get_parameter_state(
             param_name, StatusType.LESSON_NUM)
         lesson = settings.curriculum[lesson_num]
         samplers[param_name] = lesson.value
     return samplers
コード例 #8
0
    def __init__(
        self,
        trainer,
        brain_name,
        controller,
        reward_buff_cap,
        trainer_settings,
        training,
        artifact_path,
    ):
        """
        Creates a GhostTrainer.
        :param trainer: The trainer of the policy/policies being trained with self_play
        :param brain_name: The name of the brain associated with trainer config
        :param controller: GhostController that coordinates all ghost trainers and calculates ELO
        :param reward_buff_cap: Max reward history to track in the reward buffer
        :param trainer_settings: The parameters for the trainer.
        :param training: Whether the trainer is set for training.
        :param artifact_path: Path to store artifacts from this trainer.
        """

        super().__init__(brain_name, trainer_settings, training, artifact_path,
                         reward_buff_cap)

        self.trainer = trainer
        self.controller = controller

        self._internal_trajectory_queues: Dict[
            str, AgentManagerQueue[Trajectory]] = {}
        self._internal_policy_queues: Dict[str, AgentManagerQueue[Policy]] = {}

        self._team_to_name_to_policy_queue: DefaultDict[int, Dict[
            str, AgentManagerQueue[Policy]]] = defaultdict(dict)

        self._name_to_parsed_behavior_id: Dict[str, BehaviorIdentifiers] = {}

        # assign ghost's stats collection to wrapped trainer's
        self._stats_reporter = self.trainer.stats_reporter
        # Set the logging to print ELO in the console
        self._stats_reporter.add_property(StatsPropertyType.SELF_PLAY, True)

        self_play_parameters = trainer_settings.self_play
        self.window = self_play_parameters.window
        self.play_against_latest_model_ratio = (
            self_play_parameters.play_against_latest_model_ratio)
        if (self.play_against_latest_model_ratio > 1.0
                or self.play_against_latest_model_ratio < 0.0):
            logger.warning(
                "The play_against_latest_model_ratio is not between 0 and 1.")

        self.steps_between_save = self_play_parameters.save_steps
        self.steps_between_swap = self_play_parameters.swap_steps
        self.steps_to_train_team = self_play_parameters.team_change
        if self.steps_to_train_team > self.get_max_steps:
            logger.warning(
                "The max steps of the GhostTrainer for behavior name {} is less than team change. This team will not face \
                opposition that has been trained if the opposition is managed by a different GhostTrainer as in an \
                asymmetric game.".format(self.brain_name))

        # Counts the number of steps of the ghost policies. Snapshot swapping
        # depends on this counter whereas snapshot saving and team switching depends
        # on the wrapped. This ensures that all teams train for the same number of trainer
        # steps.
        self.ghost_step: int = 0

        # A list of dicts from brain name to a single snapshot for this trainer's policies
        self.policy_snapshots: List[Dict[str, List[float]]] = []

        # A dict from brain name to the current snapshot of this trainer's policies
        self.current_policy_snapshot: Dict[str, List[float]] = {}

        self.snapshot_counter: int = 0

        # wrapped_training_team and learning team need to be separate
        # in the situation where new agents are created destroyed
        # after learning team switches. These agents need to be added
        # to trainers properly.
        self._learning_team: int = None
        self.wrapped_trainer_team: int = None
        self.last_save: int = 0
        self.last_swap: int = 0
        self.last_team_change: int = 0

        self.initial_elo = GlobalTrainingStatus.get_parameter_state(
            self.brain_name, StatusType.ELO)
        if self.initial_elo is None:
            self.initial_elo = self_play_parameters.initial_elo
        self.policy_elos: List[float] = [self.initial_elo] * (
            self.window + 1)  # for learning policy
        self.current_opponent: int = 0