Exemple #1
0
 def __init__(
     self,
     settings: Optional[Dict[str, EnvironmentParameterSettings]] = None,
     run_seed: int = -1,
     restore: bool = False,
 ):
     """
     EnvironmentParameterManager manages all the environment parameters of a training
     session. It determines when parameters should change and gives access to the
     current sampler of each parameter.
     :param settings: A dictionary from environment parameter to
     EnvironmentParameterSettings.
     :param run_seed: When the seed is not provided for an environment parameter,
     this seed will be used instead.
     :param restore: If true, the EnvironmentParameterManager will use the
     GlobalTrainingStatus to try and reload the lesson status of each environment
     parameter.
     """
     if settings is None:
         settings = {}
     self._dict_settings = settings
     for parameter_name in self._dict_settings.keys():
         initial_lesson = GlobalTrainingStatus.get_parameter_state(
             parameter_name, StatusType.LESSON_NUM)
         if initial_lesson is None or not restore:
             GlobalTrainingStatus.set_parameter_state(
                 parameter_name, StatusType.LESSON_NUM, 0)
     self._smoothed_values: Dict[str, float] = defaultdict(float)
     for key in self._dict_settings.keys():
         self._smoothed_values[key] = 0.0
     # Update the seeds of the samplers
     self._set_sampler_seeds(run_seed)
def test_model_management(tmpdir):

    results_path = os.path.join(tmpdir, "results")
    brain_name = "Mock_brain"
    final_model_path = os.path.join(results_path, brain_name)
    test_checkpoint_list = [
        {
            "steps": 1,
            "file_path": os.path.join(final_model_path, f"{brain_name}-1.nn"),
            "reward": 1.312,
            "creation_time": time.time(),
            "auxillary_file_paths": [],
        },
        {
            "steps": 2,
            "file_path": os.path.join(final_model_path, f"{brain_name}-2.nn"),
            "reward": 1.912,
            "creation_time": time.time(),
            "auxillary_file_paths": [],
        },
        {
            "steps": 3,
            "file_path": os.path.join(final_model_path, f"{brain_name}-3.nn"),
            "reward": 2.312,
            "creation_time": time.time(),
            "auxillary_file_paths": [],
        },
    ]
    GlobalTrainingStatus.set_parameter_state(brain_name,
                                             StatusType.CHECKPOINTS,
                                             test_checkpoint_list)

    new_checkpoint_4 = ModelCheckpoint(
        4, os.path.join(final_model_path, f"{brain_name}-4.nn"), 2.678,
        time.time())
    ModelCheckpointManager.add_checkpoint(brain_name, new_checkpoint_4, 4)
    assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4

    new_checkpoint_5 = ModelCheckpoint(
        5, os.path.join(final_model_path, f"{brain_name}-5.nn"), 3.122,
        time.time())
    ModelCheckpointManager.add_checkpoint(brain_name, new_checkpoint_5, 4)
    assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4

    final_model_path = f"{final_model_path}.nn"
    final_model_time = time.time()
    current_step = 6
    final_model = ModelCheckpoint(current_step, final_model_path, 3.294,
                                  final_model_time)

    ModelCheckpointManager.track_final_checkpoint(brain_name, final_model)
    assert len(ModelCheckpointManager.get_checkpoints(brain_name)) == 4

    check_checkpoints = GlobalTrainingStatus.saved_state[brain_name][
        StatusType.CHECKPOINTS.value]
    assert check_checkpoints is not None

    final_model = GlobalTrainingStatus.saved_state[
        StatusType.FINAL_CHECKPOINT.value]
    assert final_model is not None
 def log_current_lesson(self, parameter_name: Optional[str] = None) -> None:
     """
     Logs the current lesson number and sampler value of the parameter with name
     parameter_name. If no parameter_name is provided, the values and lesson
     numbers of all parameters will be displayed.
     """
     if parameter_name is not None:
         settings = self._dict_settings[parameter_name]
         lesson_number = GlobalTrainingStatus.get_parameter_state(
             parameter_name, StatusType.LESSON_NUM
         )
         lesson_name = settings.curriculum[lesson_number].name
         lesson_value = settings.curriculum[lesson_number].value
         logger.info(
             f"Parameter '{parameter_name}' is in lesson '{lesson_name}' "
             f"and has value '{lesson_value}'."
         )
     else:
         for parameter_name, settings in self._dict_settings.items():
             lesson_number = GlobalTrainingStatus.get_parameter_state(
                 parameter_name, StatusType.LESSON_NUM
             )
             lesson_name = settings.curriculum[lesson_number].name
             lesson_value = settings.curriculum[lesson_number].value
             logger.info(
                 f"Parameter '{parameter_name}' is in lesson '{lesson_name}' "
                 f"and has value '{lesson_value}'."
             )
Exemple #4
0
 def save_model(self) -> None:
     """
     Forwarding call to wrapped trainers save_model.
     """
     GlobalTrainingStatus.set_parameter_state(self.brain_name,
                                              StatusType.ELO,
                                              self.current_elo)
     self.trainer.save_model()
Exemple #5
0
 def get_checkpoints(behavior_name: str) -> List[Dict[str, Any]]:
     checkpoint_list = GlobalTrainingStatus.get_parameter_state(
         behavior_name, StatusType.CHECKPOINTS)
     if not checkpoint_list:
         checkpoint_list = []
         GlobalTrainingStatus.set_parameter_state(behavior_name,
                                                  StatusType.CHECKPOINTS,
                                                  checkpoint_list)
     return checkpoint_list
def test_globaltrainingstatus(tmpdir):
    path_dir = os.path.join(tmpdir, "test.json")

    GlobalTrainingStatus.set_parameter_state("Category1",
                                             StatusType.LESSON_NUM, 3)
    GlobalTrainingStatus.save_state(path_dir)

    with open(path_dir) as fp:
        test_json = json.load(fp)

    assert "Category1" in test_json
    assert StatusType.LESSON_NUM.value in test_json["Category1"]
    assert test_json["Category1"][StatusType.LESSON_NUM.value] == 3
    assert "metadata" in test_json

    GlobalTrainingStatus.load_state(path_dir)
    restored_val = GlobalTrainingStatus.get_parameter_state(
        "Category1", StatusType.LESSON_NUM)
    assert restored_val == 3

    # Test unknown categories and status types (keys)
    unknown_category = GlobalTrainingStatus.get_parameter_state(
        "Category3", StatusType.LESSON_NUM)

    class FakeStatusType(Enum):
        NOTAREALKEY = "notarealkey"

    unknown_key = GlobalTrainingStatus.get_parameter_state(
        "Category1", FakeStatusType.NOTAREALKEY)
    assert unknown_category is None
    assert unknown_key is None
Exemple #7
0
    def update_lessons(
        self,
        trainer_steps: Dict[str, int],
        trainer_max_steps: Dict[str, int],
        trainer_reward_buffer: Dict[str, List[float]],
    ) -> Tuple[bool, bool]:
        """
        Given progress metrics, calculates if at least one environment parameter is
        in a new lesson and if at least one environment parameter requires the env
        to reset.
        :param trainer_steps: A dictionary from behavior_name to the number of training
        steps this behavior's trainer has performed.
        :param trainer_max_steps: A dictionary from behavior_name to the maximum number
        of training steps this behavior's trainer has performed.
        :param trainer_reward_buffer: A dictionary from behavior_name to the list of
        the most recent episode returns for this behavior's trainer.
        :returns: A tuple of two booleans : (True if any lesson has changed, True if
        environment needs to reset)
        """
        must_reset = False
        updated = False
        for param_name, settings in self._dict_settings.items():
            lesson_num = GlobalTrainingStatus.get_parameter_state(
                param_name, StatusType.LESSON_NUM)
            next_lesson_num = lesson_num + 1
            lesson = settings.curriculum[lesson_num]
            if (lesson.completion_criteria is not None
                    and len(settings.curriculum) > next_lesson_num):
                behavior_to_consider = lesson.completion_criteria.behavior
                if behavior_to_consider in trainer_steps:
                    must_increment, new_smoothing = lesson.completion_criteria.need_increment(
                        float(trainer_steps[behavior_to_consider]) /
                        float(trainer_max_steps[behavior_to_consider]),
                        trainer_reward_buffer[behavior_to_consider],
                        self._smoothed_values[param_name],
                    )
                    self._smoothed_values[param_name] = new_smoothing
                    if must_increment:
                        GlobalTrainingStatus.set_parameter_state(
                            param_name, StatusType.LESSON_NUM, next_lesson_num)
                        new_lesson_name = settings.curriculum[
                            next_lesson_num].name
                        new_lesson_value = settings.curriculum[
                            next_lesson_num].value

                        logger.info(
                            f"Parameter '{param_name}' has been updated to {new_lesson_value}."
                            + f" Now in lesson '{new_lesson_name}'")
                        updated = True
                        if lesson.completion_criteria.require_reset:
                            must_reset = True
        return updated, must_reset
Exemple #8
0
 def track_final_checkpoint(cls, behavior_name: str,
                            final_checkpoint: NNCheckpoint) -> None:
     """
     Ensures number of checkpoints stored is within the max number of checkpoints
     defined by the user and finally stores the information about the final
     model (or intermediate model if training is interrupted).
     :param behavior_name: Behavior name of the model.
     :param final_checkpoint: Checkpoint information for the final model.
     """
     final_model_dict = attr.asdict(final_checkpoint)
     GlobalTrainingStatus.set_parameter_state(behavior_name,
                                              StatusType.FINAL_CHECKPOINT,
                                              final_model_dict)
Exemple #9
0
 def add_checkpoint(cls, behavior_name: str, new_checkpoint: NNCheckpoint,
                    keep_checkpoints: int) -> None:
     """
     Make room for new checkpoint if needed and insert new checkpoint information.
     :param behavior_name: Behavior name for the checkpoint.
     :param new_checkpoint: The new checkpoint to be recorded.
     :param keep_checkpoints: Number of checkpoints to record (user-defined).
     """
     new_checkpoint_dict = attr.asdict(new_checkpoint)
     checkpoints = cls.get_checkpoints(behavior_name)
     checkpoints.append(new_checkpoint_dict)
     cls._cleanup_extra_checkpoints(checkpoints, keep_checkpoints)
     GlobalTrainingStatus.set_parameter_state(behavior_name,
                                              StatusType.CHECKPOINTS,
                                              checkpoints)
Exemple #10
0
 def get_current_lesson_number(self) -> Dict[str, int]:
     """
     Creates a dictionary from environment parameter to the current lesson number.
     If not using curriculum, this number is always 0 for that environment parameter.
     """
     result: Dict[str, int] = {}
     for parameter_name in self._dict_settings.keys():
         result[parameter_name] = GlobalTrainingStatus.get_parameter_state(
             parameter_name, StatusType.LESSON_NUM)
     return result
    def advance(self, env: EnvManager) -> int:
        # Get steps
        with hierarchical_timer("env_step"):
            num_steps = env.advance()
        # Report current lesson
        if self.meta_curriculum:
            for brain_name, curr in self.meta_curriculum.brains_to_curricula.items(
            ):
                if brain_name in self.trainers:
                    self.trainers[brain_name].stats_reporter.set_stat(
                        "Environment/Lesson", curr.lesson_num)
                    GlobalTrainingStatus.set_parameter_state(
                        brain_name, StatusType.LESSON_NUM, curr.lesson_num)

        for trainer in self.trainers.values():
            if not trainer.threaded:
                with hierarchical_timer("trainer_advance"):
                    trainer.advance()

        return num_steps
Exemple #12
0
 def get_current_samplers(
         self) -> Dict[str, ParameterRandomizationSettings]:
     """
     Creates a dictionary from environment parameter name to their corresponding
     ParameterRandomizationSettings. If curriculum is used, the
     ParameterRandomizationSettings corresponds to the sampler of the current lesson.
     """
     samplers: Dict[str, ParameterRandomizationSettings] = {}
     for param_name, settings in self._dict_settings.items():
         lesson_num = GlobalTrainingStatus.get_parameter_state(
             param_name, StatusType.LESSON_NUM)
         lesson = settings.curriculum[lesson_num]
         samplers[param_name] = lesson.value
     return samplers
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """

    options.checkpoint_settings.run_id = "test8"

    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (os.path.join(base_path,
                                        checkpoint_settings.initialize_from)
                           if checkpoint_settings.initialize_from else None)
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json"))
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume)
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

    engine_config = EngineConfig(
        width=engine_settings.width,
        height=engine_settings.height,
        quality_level=engine_settings.quality_level,
        time_scale=engine_settings.time_scale,
        target_frame_rate=engine_settings.target_frame_rate,
        capture_frame_rate=engine_settings.capture_frame_rate,
    )
    if env_settings.env_path is None:
        port = None
    # Begin training

    env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"
    env_factory = create_environment_factory(
        env_settings.env_path,
        engine_settings.no_graphics,
        run_seed,
        port,
        env_settings.env_args,
        os.path.abspath(
            run_logs_dir),  # Unity environment requires absolute path
    )
    env_manager = SubprocessEnvManager(env_factory, engine_config,
                                       env_settings.num_envs)

    maybe_meta_curriculum = try_create_meta_curriculum(
        options.curriculum, env_manager, restore=checkpoint_settings.resume)
    sampler_manager, resampling_interval = create_sampler_manager(
        options.parameter_randomization, run_seed)
    max_steps = options.behaviors['Brain'].max_steps
    options.behaviors['Brain'].max_steps = 10

    trainer_factory = TrainerFactory(options,
                                     write_path,
                                     not checkpoint_settings.inference,
                                     checkpoint_settings.resume,
                                     run_seed,
                                     maybe_init_path,
                                     maybe_meta_curriculum,
                                     False,
                                     total_steps=0)
    trainer_factory.trainer_config[
        'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT

    # Create controller and begin training.
    tc = TrainerController(
        trainer_factory,
        write_path,
        checkpoint_settings.run_id,
        maybe_meta_curriculum,
        not checkpoint_settings.inference,
        run_seed,
        sampler_manager,
        resampling_interval,
    )
    try:
        # Get inital weights
        tc.init_weights(env_manager)
        inital_weights = deepcopy(tc.weights)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)

    options.behaviors['Brain'].max_steps = max_steps
    step = 0
    counter = 0
    max_meta_updates = 200
    while counter < max_meta_updates:
        sample = np.random.random_sample()
        if (sample > 1):
            print("Performing Meta-learning on Carry Object stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mCarryObject_new/RLProject.exe"
        else:
            print("Performing Meta-learning on Find Target stage")
            env_settings.env_path = "C:/Users/Sebastian/Desktop/RLUnity/Training/mFindTarget_new/RLProject.exe"

        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(
                run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, engine_config,
                                           env_settings.num_envs)

        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum,
            env_manager,
            restore=checkpoint_settings.resume)
        sampler_manager, resampling_interval = create_sampler_manager(
            options.parameter_randomization, run_seed)

        trainer_factory = TrainerFactory(options,
                                         write_path,
                                         not checkpoint_settings.inference,
                                         checkpoint_settings.resume,
                                         run_seed,
                                         maybe_init_path,
                                         maybe_meta_curriculum,
                                         False,
                                         total_steps=step)

        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate_schedule = ScheduleType.CONSTANT
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.learning_rate = 0.0005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.beta = 0.005 * (
                1 - counter / max_meta_updates)
        trainer_factory.trainer_config[
            'Brain'].hyperparameters.epsilon = 0.2 * (
                1 - counter / max_meta_updates)
        print("Current lr: {}\nCurrent beta: {}\nCurrent epsilon: {}".format(
            trainer_factory.trainer_config['Brain'].hyperparameters.
            learning_rate,
            trainer_factory.trainer_config['Brain'].hyperparameters.beta,
            trainer_factory.trainer_config['Brain'].hyperparameters.epsilon))

        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
            sampler_manager,
            resampling_interval,
        )
        try:
            # Get inital weights
            print("Start learning at step: " + str(step) + " meta_step: " +
                  str(counter))
            print("Inital weights: " + str(inital_weights[8]))
            weights_after_train = tc.start_learning(env_manager,
                                                    inital_weights)

            print(tc.trainers['Brain'].optimizer)

            # weights_after_train = tc.weights
            # print("Trained weights: " + str(weights_after_train[8]))
            step += options.behaviors['Brain'].max_steps
            print("meta step:" + str(step))
            # print(weights_after_train)
            # equal = []
            # for i, weight in enumerate(tc.weights):
            #     equal.append(np.array_equal(inital_weights[i], weights_after_train[i]))
            # print(all(equal))
        finally:
            print(len(weights_after_train), len(inital_weights))
            for i, weight in enumerate(weights_after_train):
                inital_weights[i] = weights_after_train[i]
            env_manager.close()
            write_run_options(write_path, options)
            write_timing_tree(run_logs_dir)
            write_training_status(run_logs_dir)
        counter += 1
def write_training_status(output_dir: str) -> None:
    GlobalTrainingStatus.save_state(
        os.path.join(output_dir, TRAINING_STATUS_FILE_NAME))
Exemple #15
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from is not None
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )

        # Configure Tensorboard Writers and StatsReporter
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
Exemple #16
0
    def __init__(
        self,
        trainer,
        brain_name,
        controller,
        reward_buff_cap,
        trainer_settings,
        training,
        artifact_path,
    ):
        """
        Creates a GhostTrainer.
        :param trainer: The trainer of the policy/policies being trained with self_play
        :param brain_name: The name of the brain associated with trainer config
        :param controller: GhostController that coordinates all ghost trainers and calculates ELO
        :param reward_buff_cap: Max reward history to track in the reward buffer
        :param trainer_settings: The parameters for the trainer.
        :param training: Whether the trainer is set for training.
        :param artifact_path: Path to store artifacts from this trainer.
        """

        super().__init__(brain_name, trainer_settings, training, artifact_path,
                         reward_buff_cap)

        self.trainer = trainer
        self.controller = controller

        self._internal_trajectory_queues: Dict[
            str, AgentManagerQueue[Trajectory]] = {}
        self._internal_policy_queues: Dict[str, AgentManagerQueue[Policy]] = {}

        self._team_to_name_to_policy_queue: DefaultDict[int, Dict[
            str, AgentManagerQueue[Policy]]] = defaultdict(dict)

        self._name_to_parsed_behavior_id: Dict[str, BehaviorIdentifiers] = {}

        # assign ghost's stats collection to wrapped trainer's
        self._stats_reporter = self.trainer.stats_reporter
        # Set the logging to print ELO in the console
        self._stats_reporter.add_property(StatsPropertyType.SELF_PLAY, True)

        self_play_parameters = trainer_settings.self_play
        self.window = self_play_parameters.window
        self.play_against_latest_model_ratio = (
            self_play_parameters.play_against_latest_model_ratio)
        if (self.play_against_latest_model_ratio > 1.0
                or self.play_against_latest_model_ratio < 0.0):
            logger.warning(
                "The play_against_latest_model_ratio is not between 0 and 1.")

        self.steps_between_save = self_play_parameters.save_steps
        self.steps_between_swap = self_play_parameters.swap_steps
        self.steps_to_train_team = self_play_parameters.team_change
        if self.steps_to_train_team > self.get_max_steps:
            logger.warning(
                "The max steps of the GhostTrainer for behavior name {} is less than team change. This team will not face \
                opposition that has been trained if the opposition is managed by a different GhostTrainer as in an \
                asymmetric game.".format(self.brain_name))

        # Counts the number of steps of the ghost policies. Snapshot swapping
        # depends on this counter whereas snapshot saving and team switching depends
        # on the wrapped. This ensures that all teams train for the same number of trainer
        # steps.
        self.ghost_step: int = 0

        # A list of dicts from brain name to a single snapshot for this trainer's policies
        self.policy_snapshots: List[Dict[str, List[float]]] = []

        # A dict from brain name to the current snapshot of this trainer's policies
        self.current_policy_snapshot: Dict[str, List[float]] = {}

        self.snapshot_counter: int = 0

        # wrapped_training_team and learning team need to be separate
        # in the situation where new agents are created destroyed
        # after learning team switches. These agents need to be added
        # to trainers properly.
        self._learning_team: int = None
        self.wrapped_trainer_team: int = None
        self.last_save: int = 0
        self.last_swap: int = 0
        self.last_team_change: int = 0

        self.initial_elo = GlobalTrainingStatus.get_parameter_state(
            self.brain_name, StatusType.ELO)
        if self.initial_elo is None:
            self.initial_elo = self_play_parameters.initial_elo
        self.policy_elos: List[float] = [self.initial_elo] * (
            self.window + 1)  # for learning policy
        self.current_opponent: int = 0
Exemple #17
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        handle_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # Configure CSV, Tensorboard Writers and StatsReporter
        # We assume reward and episode length are needed in the CSV.
        csv_writer = CSVWriter(
            write_path,
            required_fields=[
                "Environment/Cumulative Reward",
                "Environment/Episode Length",
            ],
        )
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(csv_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        maybe_meta_curriculum = try_create_meta_curriculum(
            options.curriculum, env_manager, restore=checkpoint_settings.resume
        )
        maybe_add_samplers(options.parameter_randomization, env_manager, run_seed)
        trainer_factory = TrainerFactory(
            options.behaviors,
            write_path,
            not checkpoint_settings.inference,
            checkpoint_settings.resume,
            run_seed,
            maybe_init_path,
            maybe_meta_curriculum,
            False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            maybe_meta_curriculum,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
Exemple #18
0
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None:
    """
    Launches training session.
    :param run_seed: Random seed used for training.
    :param num_areas: Number of training areas to instantiate
    :param options: parsed command line arguments
    """
    with hierarchical_timer("run_training.setup"):
        torch_utils.set_torch_config(options.torch_settings)
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings

        run_logs_dir = checkpoint_settings.run_logs_dir
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            checkpoint_settings.write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            checkpoint_settings.maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states in case of resume
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # In case of initialization, set full init_path for all behaviors
        elif checkpoint_settings.maybe_init_path is not None:
            setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path)

        # Configure Tensorboard Writers and StatsReporter
        stats_writers = register_stats_writer_plugins(options)
        for sw in stats_writers:
            StatsReporter.add_writer(sw)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            num_areas,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs)
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=checkpoint_settings.write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=checkpoint_settings.maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            checkpoint_settings.write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(checkpoint_settings.write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)