Beispiel #1
0
    def set_reward_from_config(self, env_name: str, env_config: dict):
        """Build and set the reward function from environment configurations.

        Args:
            env_name: the environment's id
            env_config: the environment's configuration
        """
        self.reward_fn = get_reward_fn(env_name, env_config)
Beispiel #2
0
def env_reward(request, envs, env_config):
    env_name = request.param
    if "HalfCheetah" in env_name:
        env_config["exclude_current_positions_from_observation"] = False
    if "IndustrialBenchmark" in env_name:
        env_config["max_episode_steps"] = 200

    env = envs[env_name](env_config)
    reward_fn = get_reward_fn(env_name, env_config)
    return env, reward_fn
Beispiel #3
0
def env_reward(request, envs, env_config):
    env_name: str = request.param
    if env_name.endswith("-v3"):
        kwargs = dict(exclude_current_positions_from_observation=False)
        env_config["kwargs"] = kwargs
    if "IndustrialBenchmark" in env_name:
        env_config["max_episode_steps"] = 200

    env = envs[env_name](env_config)
    reward_fn = get_reward_fn(env_name, env_config)
    return env, reward_fn
Beispiel #4
0
def env_reward(request, envs, env_config):
    env_name = request.param
    if any(
        prefix in env_name for prefix in "HalfCheetah Walker2d Swimmer Hopper".split()
    ):
        env_config["exclude_current_positions_from_observation"] = False
    if "IndustrialBenchmark" in env_name:
        env_config["max_episode_steps"] = 200

    env = envs[env_name](env_config)
    reward_fn = get_reward_fn(env_name, env_config)
    return env, reward_fn
Beispiel #5
0
def test_set_reward_from_config(policy, envs):  # pylint:disable=unused-argument
    obs_space, action_space = policy.observation_space, policy.action_space
    batch_size = 10
    obs = fake_space_samples(obs_space, batch_size=batch_size)
    act = fake_space_samples(action_space, batch_size=batch_size)
    new_obs = fake_space_samples(obs_space, batch_size=batch_size)
    obs, act, new_obs = map(policy.convert_to_tensor, (obs, act, new_obs))

    policy.set_reward_from_config("MockEnv", {})

    original_fn = get_reward_fn("MockEnv", {})
    expected_rew = original_fn(obs, act, new_obs)
    rew = policy.reward_fn(obs, act, new_obs)

    assert torch.allclose(rew, expected_rew)
Beispiel #6
0
 def set_reward_from_config(self):
     """Build and set the reward function from environment configurations."""
     env_id, env_config = self.config["env"], self.config["env_config"]
     self.reward_fn = envs.get_reward_fn(env_id, env_config)
     self._set_reward_hook()
def reward_fn(env_name, env_config, size):
    base = envs.get_reward_fn(env_name, env_config)
    wrapped = LinearRedundant.wrap_env_function(base, size)
    return wrapped
Beispiel #8
0
def env_reward(request, envs, env_config):
    env_name = request.param
    env = envs[env_name](env_config)
    reward_fn = get_reward_fn(env_name, env_config)
    return env, reward_fn
Beispiel #9
0
def reward_fn(env_name, env_config, size):
    base = envs.get_reward_fn(env_name, env_config)
    wrapped = CorrelatedIrrelevant.wrap_env_function(base, size)
    return wrapped
def reward_fn(env_name, size, env_config):
    base = envs.get_reward_fn(env_name, env_config)
    wrapped = RandomIrrelevant.wrap_env_function(base, size)
    return wrapped