예제 #1
0
def basic_trainer_controller():
    trainer_factory_mock = MagicMock()
    trainer_factory_mock.ghost_controller = GhostController()
    return TrainerController(
        trainer_factory=trainer_factory_mock,
        output_path="test_model_path",
        run_id="test_run_id",
        param_manager=EnvironmentParameterManager(),
        train=True,
        training_seed=99,
    )
def test_curriculum_raises_all_completion_criteria_conversion():
    with pytest.warns(TrainerConfigWarning):
        run_options = RunOptions.from_dict(
            yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
        )

        param_manager = EnvironmentParameterManager(
            run_options.environment_parameters, 1337, False
        )
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (True, True)
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (True, True)
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (False, False)
        assert param_manager.get_current_lesson_number() == {"param_1": 2}
def check_environment_trains(
    env,
    trainer_config,
    reward_processor=default_reward_processor,
    env_parameter_manager=None,
    success_threshold=0.9,
    env_manager=None,
    training_seed=None,
):
    if env_parameter_manager is None:
        env_parameter_manager = EnvironmentParameterManager()
    # Create controller and begin training.
    with tempfile.TemporaryDirectory() as dir:
        run_id = "id"
        seed = 1337 if training_seed is None else training_seed
        StatsReporter.writers.clear(
        )  # Clear StatsReporters so we don't write to file
        debug_writer = DebugWriter()
        StatsReporter.add_writer(debug_writer)
        if env_manager is None:
            env_manager = SimpleEnvManager(env, EnvironmentParametersChannel())
        trainer_factory = TrainerFactory(
            trainer_config=trainer_config,
            output_path=dir,
            train_model=True,
            load_model=False,
            seed=seed,
            param_manager=env_parameter_manager,
            multi_gpu=False,
        )

        tc = TrainerController(
            trainer_factory=trainer_factory,
            output_path=dir,
            run_id=run_id,
            param_manager=env_parameter_manager,
            train=True,
            training_seed=seed,
        )

        # Begin training
        tc.start_learning(env_manager)
        if (success_threshold is not None
            ):  # For tests where we are just checking setup and not reward
            processed_rewards = [
                reward_processor(rewards)
                for rewards in env.final_rewards.values()
            ]
            assert all(not math.isnan(reward) for reward in processed_rewards)
            assert all(reward > success_threshold
                       for reward in processed_rewards)
예제 #4
0
def test_handles_no_config_provided():
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors

    trainer_factory = trainer_util.TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
        param_manager=EnvironmentParameterManager(),
    )
    trainer_factory.generate(brain_name)
예제 #5
0
def test_sac_trainer_update_normalization(sac_config):
    behavior_id_team0 = "test_brain?team=0"
    brain_name = BehaviorIdentifiers.from_name_behavior_id(
        behavior_id_team0).brain_name
    mock_specs = mb.setup_test_behavior_specs(True,
                                              False,
                                              vector_action_space=[2],
                                              vector_obs_space=1)
    base_config = sac_config.behaviors
    output_path = "results_dir"
    train_model = True
    load_model = False
    seed = 42
    trainer_factory = TrainerFactory(
        trainer_config=base_config,
        output_path=output_path,
        train_model=train_model,
        load_model=load_model,
        seed=seed,
        param_manager=EnvironmentParameterManager(),
    )
    sac_trainer = trainer_factory.generate(brain_name)
    parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(
        behavior_id_team0)
    policy = sac_trainer.create_policy(parsed_behavior_id0, mock_specs)
    sac_trainer.add_policy(parsed_behavior_id0, policy)
    trajectory_queue0 = AgentManagerQueue(behavior_id_team0)
    sac_trainer.subscribe_trajectory_queue(trajectory_queue0)
    time_horizon = 15
    trajectory = make_fake_trajectory(
        length=time_horizon,
        max_step_complete=True,
        observation_specs=create_observation_specs_with_shapes([(1, )]),
        action_spec=mock_specs.action_spec,
    )
    trajectory_queue0.put(trajectory)
    # mocking out update_normalization in both the policy and critic
    with patch(
            "mlagents.trainers.torch.networks.ValueNetwork.update_normalization"
    ) as optimizer_update_normalization_mock, patch(
            "mlagents.trainers.policy.torch_policy.TorchPolicy.update_normalization"
    ) as policy_update_normalization_mock:
        sac_trainer.advance()
        optimizer_update_normalization_mock.assert_called_once()
        policy_update_normalization_mock.assert_called_once()
예제 #6
0
def test_handles_no_config_provided():
    """
    Make sure the trainer setup handles no configs provided at all.
    """
    brain_name = "testbrain"
    no_default_config = RunOptions().behaviors
    # Pretend this was created without a YAML file
    no_default_config.set_config_specified(False)

    trainer_factory = TrainerFactory(
        trainer_config=no_default_config,
        output_path="output_path",
        train_model=True,
        load_model=False,
        seed=42,
        param_manager=EnvironmentParameterManager(),
    )
    trainer_factory.generate(brain_name)
예제 #7
0
def test_initialize_ppo_trainer(BehaviorSpecMock, dummy_config):
    brain_name = "testbrain"
    training_behaviors = {"testbrain": BehaviorSpecMock()}
    output_path = "results_dir"
    train_model = True
    load_model = False
    seed = 11
    expected_reward_buff_cap = 1

    base_config = dummy_config.behaviors
    expected_config = ppo_dummy_config()

    def mock_constructor(
        self,
        brain,
        reward_buff_cap,
        trainer_settings,
        training,
        load,
        seed,
        artifact_path,
    ):
        assert brain == brain_name
        assert trainer_settings == expected_config
        assert reward_buff_cap == expected_reward_buff_cap
        assert training == train_model
        assert load == load_model
        assert seed == seed
        assert artifact_path == os.path.join(output_path, brain_name)

    with patch.object(PPOTrainer, "__init__", mock_constructor):
        trainer_factory = trainer_util.TrainerFactory(
            trainer_config=base_config,
            output_path=output_path,
            train_model=train_model,
            load_model=load_model,
            seed=seed,
            param_manager=EnvironmentParameterManager(),
        )
        trainers = {}
        for brain_name in training_behaviors.keys():
            trainers[brain_name] = trainer_factory.generate(brain_name)
        assert "testbrain" in trainers
        assert isinstance(trainers["testbrain"], PPOTrainer)
예제 #8
0
def run_training(run_seed: int, options: RunOptions) -> None:
    """
    Launches training session.
    :param options: parsed command line arguments
    :param run_seed: Random seed used for training.
    :param run_options: Command line arguments for training.
    """
    with hierarchical_timer("run_training.setup"):
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings
        base_path = "results"
        write_path = os.path.join(base_path, checkpoint_settings.run_id)
        maybe_init_path = (
            os.path.join(base_path, checkpoint_settings.initialize_from)
            if checkpoint_settings.initialize_from is not None
            else None
        )
        run_logs_dir = os.path.join(write_path, "run_logs")
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )

        # Configure Tensorboard Writers and StatsReporter
        tb_writer = TensorboardWriter(
            write_path, clear_past_data=not checkpoint_settings.resume
        )
        gauge_write = GaugeWriter()
        console_writer = ConsoleWriter()
        StatsReporter.add_writer(tb_writer)
        StatsReporter.add_writer(gauge_write)
        StatsReporter.add_writer(console_writer)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )
        engine_config = EngineConfig(
            width=engine_settings.width,
            height=engine_settings.height,
            quality_level=engine_settings.quality_level,
            time_scale=engine_settings.time_scale,
            target_frame_rate=engine_settings.target_frame_rate,
            capture_frame_rate=engine_settings.capture_frame_rate,
        )
        env_manager = SubprocessEnvManager(
            env_factory, engine_config, env_settings.num_envs
        )
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
    def _initialize_trainer(
        trainer_settings: TrainerSettings,
        brain_name: str,
        output_path: str,
        train_model: bool,
        load_model: bool,
        ghost_controller: GhostController,
        seed: int,
        param_manager: EnvironmentParameterManager,
        init_path: str = None,
        multi_gpu: bool = False,
    ) -> Trainer:
        """
        Initializes a trainer given a provided trainer configuration and brain parameters, as well as
        some general training session options.

        :param trainer_settings: Original trainer configuration loaded from YAML
        :param brain_name: Name of the brain to be associated with trainer
        :param output_path: Path to save the model and summary statistics
        :param keep_checkpoints: How many model checkpoints to keep
        :param train_model: Whether to train the model (vs. run inference)
        :param load_model: Whether to load the model or randomly initialize
        :param ghost_controller: The object that coordinates ghost trainers
        :param seed: The random seed to use
        :param param_manager: EnvironmentParameterManager, used to determine a reward buffer length for PPOTrainer
        :param init_path: Path from which to load model, if different from model_path.
        :return:
        """
        trainer_artifact_path = os.path.join(output_path, brain_name)
        if init_path is not None:
            trainer_settings.init_path = os.path.join(init_path, brain_name)

        min_lesson_length = param_manager.get_minimum_reward_buffer_size(
            brain_name)

        trainer: Trainer = None  # type: ignore  # will be set to one of these, or raise
        trainer_type = trainer_settings.trainer_type

        if trainer_type == TrainerType.PPO:
            trainer = PPOTrainer(
                brain_name,
                min_lesson_length,
                trainer_settings,
                train_model,
                load_model,
                seed,
                trainer_artifact_path,
            )
        elif trainer_type == TrainerType.SAC:
            trainer = SACTrainer(
                brain_name,
                min_lesson_length,
                trainer_settings,
                train_model,
                load_model,
                seed,
                trainer_artifact_path,
            )
        else:
            raise TrainerConfigError(
                f'The trainer config contains an unknown trainer type "{trainer_type}" for brain {brain_name}'
            )

        if trainer_settings.self_play is not None:
            trainer = GhostTrainer(
                trainer,
                brain_name,
                ghost_controller,
                min_lesson_length,
                trainer_settings,
                train_model,
                trainer_artifact_path,
            )
        return trainer
def test_create_manager():
    run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml))
    param_manager = EnvironmentParameterManager(
        run_options.environment_parameters, 1337, False
    )
    assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100
    assert param_manager.get_current_lesson_number() == {
        "param_1": 0,
        "param_2": 0,
        "param_3": 0,
    }
    assert param_manager.get_current_samplers() == {
        "param_1": ConstantSettings(seed=1337, value=1),
        "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
        "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
    }
    # Not enough episodes completed
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1000] * 99},
    ) == (False, False)
    # Not enough episodes reward
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1] * 101},
    ) == (False, False)
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1000] * 101},
    ) == (True, True)
    assert param_manager.get_current_lesson_number() == {
        "param_1": 1,
        "param_2": 0,
        "param_3": 0,
    }
    param_manager_2 = EnvironmentParameterManager(
        run_options.environment_parameters, 1337, restore=True
    )
    # The use of global status should make it so that the lesson numbers are maintained
    assert param_manager_2.get_current_lesson_number() == {
        "param_1": 1,
        "param_2": 0,
        "param_3": 0,
    }
    # No reset required
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 700},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [0] * 101},
    ) == (True, False)
    assert param_manager.get_current_samplers() == {
        "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3),
        "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
        "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
    }
예제 #11
0
파일: learn.py 프로젝트: donlee90/ml-agents
def run_training(run_seed: int, options: RunOptions, num_areas: int) -> None:
    """
    Launches training session.
    :param run_seed: Random seed used for training.
    :param num_areas: Number of training areas to instantiate
    :param options: parsed command line arguments
    """
    with hierarchical_timer("run_training.setup"):
        torch_utils.set_torch_config(options.torch_settings)
        checkpoint_settings = options.checkpoint_settings
        env_settings = options.env_settings
        engine_settings = options.engine_settings

        run_logs_dir = checkpoint_settings.run_logs_dir
        port: Optional[int] = env_settings.base_port
        # Check if directory exists
        validate_existing_directories(
            checkpoint_settings.write_path,
            checkpoint_settings.resume,
            checkpoint_settings.force,
            checkpoint_settings.maybe_init_path,
        )
        # Make run logs directory
        os.makedirs(run_logs_dir, exist_ok=True)
        # Load any needed states in case of resume
        if checkpoint_settings.resume:
            GlobalTrainingStatus.load_state(
                os.path.join(run_logs_dir, "training_status.json")
            )
        # In case of initialization, set full init_path for all behaviors
        elif checkpoint_settings.maybe_init_path is not None:
            setup_init_path(options.behaviors, checkpoint_settings.maybe_init_path)

        # Configure Tensorboard Writers and StatsReporter
        stats_writers = register_stats_writer_plugins(options)
        for sw in stats_writers:
            StatsReporter.add_writer(sw)

        if env_settings.env_path is None:
            port = None
        env_factory = create_environment_factory(
            env_settings.env_path,
            engine_settings.no_graphics,
            run_seed,
            num_areas,
            port,
            env_settings.env_args,
            os.path.abspath(run_logs_dir),  # Unity environment requires absolute path
        )

        env_manager = SubprocessEnvManager(env_factory, options, env_settings.num_envs)
        env_parameter_manager = EnvironmentParameterManager(
            options.environment_parameters, run_seed, restore=checkpoint_settings.resume
        )

        trainer_factory = TrainerFactory(
            trainer_config=options.behaviors,
            output_path=checkpoint_settings.write_path,
            train_model=not checkpoint_settings.inference,
            load_model=checkpoint_settings.resume,
            seed=run_seed,
            param_manager=env_parameter_manager,
            init_path=checkpoint_settings.maybe_init_path,
            multi_gpu=False,
        )
        # Create controller and begin training.
        tc = TrainerController(
            trainer_factory,
            checkpoint_settings.write_path,
            checkpoint_settings.run_id,
            env_parameter_manager,
            not checkpoint_settings.inference,
            run_seed,
        )

    # Begin training
    try:
        tc.start_learning(env_manager)
    finally:
        env_manager.close()
        write_run_options(checkpoint_settings.write_path, options)
        write_timing_tree(run_logs_dir)
        write_training_status(run_logs_dir)
예제 #12
0
def test_curriculum_no_behavior():
    with pytest.raises(TypeError):
        run_options = RunOptions.from_dict(
            yaml.safe_load(test_curriculum_no_behavior_yaml))
        EnvironmentParameterManager(run_options.environment_parameters, 1337,
                                    False)