コード例 #1
0
def test_curriculum_raises_all_completion_criteria_conversion():
    with pytest.warns(TrainerConfigWarning):
        run_options = RunOptions.from_dict(
            yaml.safe_load(test_bad_curriculum_all_competion_criteria_config_yaml)
        )

        param_manager = EnvironmentParameterManager(
            run_options.environment_parameters, 1337, False
        )
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (True, True)
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (True, True)
        assert param_manager.update_lessons(
            trainer_steps={"fake_behavior": 500},
            trainer_max_steps={"fake_behavior": 1000},
            trainer_reward_buffer={"fake_behavior": [1000] * 101},
        ) == (False, False)
        assert param_manager.get_current_lesson_number() == {"param_1": 2}
コード例 #2
0
def test_create_manager():
    run_options = RunOptions.from_dict(yaml.safe_load(test_everything_config_yaml))
    param_manager = EnvironmentParameterManager(
        run_options.environment_parameters, 1337, False
    )
    assert param_manager.get_minimum_reward_buffer_size("fake_behavior") == 100
    assert param_manager.get_current_lesson_number() == {
        "param_1": 0,
        "param_2": 0,
        "param_3": 0,
    }
    assert param_manager.get_current_samplers() == {
        "param_1": ConstantSettings(seed=1337, value=1),
        "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
        "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
    }
    # Not enough episodes completed
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1000] * 99},
    ) == (False, False)
    # Not enough episodes reward
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1] * 101},
    ) == (False, False)
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 500},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [1000] * 101},
    ) == (True, True)
    assert param_manager.get_current_lesson_number() == {
        "param_1": 1,
        "param_2": 0,
        "param_3": 0,
    }
    param_manager_2 = EnvironmentParameterManager(
        run_options.environment_parameters, 1337, restore=True
    )
    # The use of global status should make it so that the lesson numbers are maintained
    assert param_manager_2.get_current_lesson_number() == {
        "param_1": 1,
        "param_2": 0,
        "param_3": 0,
    }
    # No reset required
    assert param_manager.update_lessons(
        trainer_steps={"fake_behavior": 700},
        trainer_max_steps={"fake_behavior": 1000},
        trainer_reward_buffer={"fake_behavior": [0] * 101},
    ) == (True, False)
    assert param_manager.get_current_samplers() == {
        "param_1": UniformSettings(seed=1337 + 2, min_value=1, max_value=3),
        "param_2": GaussianSettings(seed=1337 + 3, mean=4, st_dev=5),
        "param_3": ConstantSettings(seed=1337 + 3 + 1, value=20),
    }