Exemple #1
0
def test_load_save(tmp_path):
    path1 = os.path.join(tmp_path, "runid1")
    path2 = os.path.join(tmp_path, "runid2")
    trainer_params = TrainerSettings()
    policy = create_policy_mock(trainer_params, model_path=path1)
    policy.initialize_or_load()
    policy._set_step(2000)

    mock_brain_name = "MockBrain"
    checkpoint_path = f"{policy.model_path}/{mock_brain_name}-2000"
    serialization_settings = SerializationSettings(policy.model_path, mock_brain_name)
    policy.checkpoint(checkpoint_path, serialization_settings)

    assert len(os.listdir(tmp_path)) > 0

    # Try load from this path
    policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
    policy2.initialize_or_load()
    _compare_two_policies(policy, policy2)
    assert policy2.get_current_step() == 2000

    # Try initialize from path 1
    trainer_params.output_path = path2
    trainer_params.init_path = path1
    policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
    policy3.initialize_or_load()

    _compare_two_policies(policy2, policy3)
    # Assert that the steps are 0.
    assert policy3.get_current_step() == 0
Exemple #2
0
def test_load_save_policy(tmp_path):
    path1 = os.path.join(tmp_path, "runid1")
    path2 = os.path.join(tmp_path, "runid2")
    trainer_params = TrainerSettings()
    policy = create_policy_mock(trainer_params)
    model_saver = TorchModelSaver(trainer_params, path1)
    model_saver.register(policy)
    model_saver.initialize_or_load(policy)
    policy.set_step(2000)

    mock_brain_name = "MockBrain"
    model_saver.save_checkpoint(mock_brain_name, 2000)
    assert len(os.listdir(tmp_path)) > 0

    # Try load from this path
    model_saver2 = TorchModelSaver(trainer_params, path1, load=True)
    policy2 = create_policy_mock(trainer_params)
    model_saver2.register(policy2)
    model_saver2.initialize_or_load(policy2)
    _compare_two_policies(policy, policy2)
    assert policy2.get_current_step() == 2000

    # Try initialize from path 1
    trainer_params.init_path = path1
    model_saver3 = TorchModelSaver(trainer_params, path2)
    policy3 = create_policy_mock(trainer_params)
    model_saver3.register(policy3)
    model_saver3.initialize_or_load(policy3)
    _compare_two_policies(policy2, policy3)
    # Assert that the steps are 0.
    assert policy3.get_current_step() == 0
Exemple #3
0
def test_load_save(tmp_path):
    path1 = os.path.join(tmp_path, "runid1")
    path2 = os.path.join(tmp_path, "runid2")
    trainer_params = TrainerSettings()
    policy = create_policy_mock(trainer_params, model_path=path1)
    policy.initialize_or_load()
    policy._set_step(2000)
    policy.save_model(2000)

    assert len(os.listdir(tmp_path)) > 0

    # Try load from this path
    policy2 = create_policy_mock(trainer_params, model_path=path1, load=True, seed=1)
    policy2.initialize_or_load()
    _compare_two_policies(policy, policy2)
    assert policy2.get_current_step() == 2000

    # Try initialize from path 1
    trainer_params.output_path = path2
    trainer_params.init_path = path1
    policy3 = create_policy_mock(trainer_params, model_path=path1, load=False, seed=2)
    policy3.initialize_or_load()

    _compare_two_policies(policy2, policy3)
    # Assert that the steps are 0.
    assert policy3.get_current_step() == 0
    def _initialize_trainer(
        trainer_settings: TrainerSettings,
        brain_name: str,
        output_path: str,
        train_model: bool,
        load_model: bool,
        ghost_controller: GhostController,
        seed: int,
        param_manager: EnvironmentParameterManager,
        init_path: str = None,
        multi_gpu: bool = False,
    ) -> Trainer:
        """
        Initializes a trainer given a provided trainer configuration and brain parameters, as well as
        some general training session options.

        :param trainer_settings: Original trainer configuration loaded from YAML
        :param brain_name: Name of the brain to be associated with trainer
        :param output_path: Path to save the model and summary statistics
        :param keep_checkpoints: How many model checkpoints to keep
        :param train_model: Whether to train the model (vs. run inference)
        :param load_model: Whether to load the model or randomly initialize
        :param ghost_controller: The object that coordinates ghost trainers
        :param seed: The random seed to use
        :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer
        :param init_path: Path from which to load model, if different from model_path.
        :return:
        """
        trainer_artifact_path = os.path.join(output_path, brain_name)
        if init_path is not None:
            trainer_settings.init_path = os.path.join(init_path, brain_name)

        min_lesson_length = param_manager.get_minimum_reward_buffer_size(
            brain_name)

        trainer: Trainer = None  # type: ignore  # will be set to one of these, or raise
        trainer_type = trainer_settings.trainer_type

        if trainer_type == TrainerType.PPO:
            trainer = PPOTrainer(
                brain_name,
                min_lesson_length,
                trainer_settings,
                train_model,
                load_model,
                seed,
                trainer_artifact_path,
            )
        elif trainer_type == TrainerType.SAC:
            trainer = SACTrainer(
                brain_name,
                min_lesson_length,
                trainer_settings,
                train_model,
                load_model,
                seed,
                trainer_artifact_path,
            )
        else:
            raise TrainerConfigError(
                f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}'
            )

        if trainer_settings.self_play is not None:
            trainer = GhostTrainer(
                trainer,
                brain_name,
                ghost_controller,
                min_lesson_length,
                trainer_settings,
                train_model,
                trainer_artifact_path,
            )
        return trainer
Exemple #5
0
def initialize_trainer(
    trainer_settings: TrainerSettings,
    brain_name: str,
    run_id: str,
    output_path: str,
    train_model: bool,
    load_model: bool,
    ghost_controller: GhostController,
    seed: int,
    init_path: str = None,
    meta_curriculum: MetaCurriculum = None,
    multi_gpu: bool = False,
) -> Trainer:
    """
    Initializes a trainer given a provided trainer configuration and brain parameters, as well as
    some general training session options.

    :param trainer_settings: Original trainer configuration loaded from YAML
    :param brain_name: Name of the brain to be associated with trainer
    :param run_id: Run ID to associate with this training run
    :param output_path: Path to save the model and summary statistics
    :param keep_checkpoints: How many model checkpoints to keep
    :param train_model: Whether to train the model (vs. run inference)
    :param load_model: Whether to load the model or randomly initialize
    :param ghost_controller: The object that coordinates ghost trainers
    :param seed: The random seed to use
    :param init_path: Path from which to load model, if different from model_path.
    :param meta_curriculum: Optional meta_curriculum, used to determine a reward buffer length for PPOTrainer
    :return:
    """
    trainer_settings.output_path = os.path.join(output_path, brain_name)
    if init_path is not None:
        trainer_settings.init_path = os.path.join(init_path, brain_name)

    min_lesson_length = 1
    if meta_curriculum:
        if brain_name in meta_curriculum.brains_to_curricula:
            min_lesson_length = meta_curriculum.brains_to_curricula[
                brain_name].min_lesson_length
        else:
            logger.warning(
                f"Metacurriculum enabled, but no curriculum for brain {brain_name}. "
                f"Brains with curricula: {meta_curriculum.brains_to_curricula.keys()}. "
            )

    trainer: Trainer = None  # type: ignore  # will be set to one of these, or raise
    trainer_type = trainer_settings.trainer_type

    if trainer_type == TrainerType.PPO:
        trainer = PPOTrainer(
            brain_name,
            min_lesson_length,
            trainer_settings,
            train_model,
            load_model,
            seed,
            run_id,
        )
    elif trainer_type == TrainerType.SAC:
        trainer = SACTrainer(
            brain_name,
            min_lesson_length,
            trainer_settings,
            train_model,
            load_model,
            seed,
            run_id,
        )
    else:
        raise TrainerConfigError(
            f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}'
        )

    if trainer_settings.self_play is not None:
        trainer = GhostTrainer(
            trainer,
            brain_name,
            ghost_controller,
            min_lesson_length,
            trainer_settings,
            train_model,
            run_id,
        )
    return trainer