예제 #1
0
    def test_config(normalization_config):
        # init environment
        env = GymMazeEnv("CartPole-v0")

        # wrap env with observation normalization
        env = ObservationNormalizationWrapper(
            env,
            default_strategy=normalization_config["default_strategy"],
            default_strategy_config=normalization_config[
                "default_strategy_config"],
            default_statistics=normalization_config["default_statistics"],
            statistics_dump=normalization_config["statistics_dump"],
            sampling_policy=normalization_config['sampling_policy'],
            exclude=normalization_config["exclude"],
            manual_config=normalization_config["manual_config"])

        # check if action space clipping was applied
        assert np.alltrue(env.observation_space["observation"].high <= 1.0)
        assert np.alltrue(env.observation_space["observation"].low >= 0.0)

        # check if stats have been set properly
        statistics = env.get_statistics()
        assert np.all(statistics["observation"]["mean"] == np.zeros(shape=4))
        assert np.all(statistics["observation"]["std"] == np.ones(shape=4))

        # test sampling
        obs = random_env_steps(env, steps=100)
        assert np.min(obs) >= 0 and np.max(obs) <= 1
예제 #2
0
def test_observation_normalization_manual_default_stats():
    """ observation normalization test """

    # init environment
    env = GymMazeEnv("CartPole-v0")

    # normalization config
    normalization_config = {
        "default_strategy":
        "maze.normalization_strategies.MeanZeroStdOneObservationNormalizationStrategy",
        "default_strategy_config": {
            "clip_range": (0, 1),
            "axis": 0
        },
        "default_statistics": {
            "mean": [0, 0, 0, 0],
            "std": [1, 1, 1, 1]
        },
        "statistics_dump": "statistics.pkl",
        "sampling_policy": RandomPolicy(env.action_spaces_dict),
        "exclude": None,
        "manual_config": None,
    }

    # wrap env with observation normalization
    env = ObservationNormalizationWrapper(
        env,
        default_strategy=normalization_config["default_strategy"],
        default_strategy_config=normalization_config[
            "default_strategy_config"],
        default_statistics=normalization_config["default_statistics"],
        statistics_dump=normalization_config["statistics_dump"],
        sampling_policy=normalization_config['sampling_policy'],
        exclude=normalization_config["exclude"],
        manual_config=normalization_config["manual_config"])

    # check if action space clipping was applied
    assert np.alltrue(env.observation_space["observation"].high <= 1.0)
    assert np.alltrue(env.observation_space["observation"].low >= 0.0)

    # check if stats have been set properly
    statistics = env.get_statistics()
    assert np.all(statistics["observation"]["mean"] == np.zeros(shape=4))
    assert np.all(statistics["observation"]["std"] == np.ones(shape=4))

    # test sampling
    obs = random_env_steps(env, steps=100)
    assert np.min(obs) >= 0 and np.max(obs) <= 1
예제 #3
0
def test_observation_normalization_init_from_yaml_config():
    """ observation normalization test """

    # load config
    config = load_env_config(test_observation_normalization_module,
                             "dummy_config_file.yml")

    # init environment
    env = GymMazeEnv("CartPole-v0")
    env = ObservationNormalizationWrapper(
        env, **config["observation_normalization_wrapper"])
    assert isinstance(env, ObservationNormalizationWrapper)

    stats = env.get_statistics()
    assert "stat_1" in stats["observation"] and "stat_2" in stats["observation"]

    norm_strategies = getattr(env, "_normalization_strategies")
    strategy = norm_strategies["observation"]
    assert isinstance(strategy, ObservationNormalizationStrategy)
    assert strategy._clip_min == 0
    assert strategy._clip_max == 1
    assert np.all(strategy._statistics["stat_1"] == np.asarray([0, 0, 0, 0]))
    assert np.all(strategy._statistics["stat_2"] == np.asarray([1, 1, 1, 1]))
예제 #4
0
def test_observation_normalization_pipeline():
    """ observation normalization test """

    # wrap env with observation normalization
    env = GymMazeEnv("CartPole-v0")
    # normalization config
    normalization_config = {
        "default_strategy":
        "maze.normalization_strategies.RangeZeroOneObservationNormalizationStrategy",
        "default_strategy_config": {
            "clip_range": (None, None),
            "axis": 0
        },
        "default_statistics": None,
        "sampling_policy": RandomPolicy(env.action_spaces_dict),
        "statistics_dump": "statistics.pkl",
        "exclude": None,
        "manual_config": None
    }
    env = ObservationNormalizationWrapper(
        env,
        default_strategy=normalization_config["default_strategy"],
        default_strategy_config=normalization_config[
            "default_strategy_config"],
        default_statistics=normalization_config["default_statistics"],
        statistics_dump=normalization_config["statistics_dump"],
        sampling_policy=normalization_config['sampling_policy'],
        exclude=normalization_config["exclude"],
        manual_config=normalization_config["manual_config"])

    # check statistics
    statistics = env.get_statistics()
    assert statistics["observation"] is None, statistics

    # check that assertion is thrown
    with pytest.raises(AssertionError):
        random_env_steps(env, steps=1)

    # estimate normalization statistics
    statistics = obtain_normalization_statistics(env, n_samples=1000)

    # check statistics
    for sub_step_key in env.observation_spaces_dict:
        for obs_key in env.observation_spaces_dict[sub_step_key].spaces:
            assert obs_key in statistics
            for stats_key in statistics[obs_key]:
                stats = statistics[obs_key][stats_key]
                assert isinstance(stats, np.ndarray)

    # test normalization
    random_env_steps(env, steps=100)

    # test file dump and loading
    statistics_copy = copy.deepcopy(env.get_statistics())
    assert os.path.exists("statistics.pkl")

    # wrap env with observation normalization
    env = GymMazeEnv("CartPole-v0")
    env = ObservationNormalizationWrapper(
        env,
        default_strategy=normalization_config["default_strategy"],
        default_strategy_config=normalization_config[
            "default_strategy_config"],
        default_statistics=normalization_config["default_statistics"],
        statistics_dump=normalization_config["statistics_dump"],
        sampling_policy=normalization_config['sampling_policy'],
        exclude=normalization_config["exclude"],
        manual_config=normalization_config["manual_config"])

    # check if stats loading worked properly
    statistics = env.get_statistics()
    for _ in env.observation_spaces_dict:
        for obs_key in statistics:
            for stats_key in statistics[obs_key]:
                assert np.all(statistics[obs_key][stats_key] ==
                              statistics_copy[obs_key][stats_key])

    # check if stepping works
    random_env_steps(env, steps=100)