def set_reward_from_config(self, env_name: str, env_config: dict): """Build and set the reward function from environment configurations. Args: env_name: the environment's id env_config: the environment's configuration """ self.reward_fn = get_reward_fn(env_name, env_config)
def env_reward(request, envs, env_config): env_name = request.param if "HalfCheetah" in env_name: env_config["exclude_current_positions_from_observation"] = False if "IndustrialBenchmark" in env_name: env_config["max_episode_steps"] = 200 env = envs[env_name](env_config) reward_fn = get_reward_fn(env_name, env_config) return env, reward_fn
def env_reward(request, envs, env_config): env_name: str = request.param if env_name.endswith("-v3"): kwargs = dict(exclude_current_positions_from_observation=False) env_config["kwargs"] = kwargs if "IndustrialBenchmark" in env_name: env_config["max_episode_steps"] = 200 env = envs[env_name](env_config) reward_fn = get_reward_fn(env_name, env_config) return env, reward_fn
def env_reward(request, envs, env_config): env_name = request.param if any( prefix in env_name for prefix in "HalfCheetah Walker2d Swimmer Hopper".split() ): env_config["exclude_current_positions_from_observation"] = False if "IndustrialBenchmark" in env_name: env_config["max_episode_steps"] = 200 env = envs[env_name](env_config) reward_fn = get_reward_fn(env_name, env_config) return env, reward_fn
def test_set_reward_from_config(policy, envs): # pylint:disable=unused-argument obs_space, action_space = policy.observation_space, policy.action_space batch_size = 10 obs = fake_space_samples(obs_space, batch_size=batch_size) act = fake_space_samples(action_space, batch_size=batch_size) new_obs = fake_space_samples(obs_space, batch_size=batch_size) obs, act, new_obs = map(policy.convert_to_tensor, (obs, act, new_obs)) policy.set_reward_from_config("MockEnv", {}) original_fn = get_reward_fn("MockEnv", {}) expected_rew = original_fn(obs, act, new_obs) rew = policy.reward_fn(obs, act, new_obs) assert torch.allclose(rew, expected_rew)
def set_reward_from_config(self): """Build and set the reward function from environment configurations.""" env_id, env_config = self.config["env"], self.config["env_config"] self.reward_fn = envs.get_reward_fn(env_id, env_config) self._set_reward_hook()
def reward_fn(env_name, env_config, size): base = envs.get_reward_fn(env_name, env_config) wrapped = LinearRedundant.wrap_env_function(base, size) return wrapped
def env_reward(request, envs, env_config): env_name = request.param env = envs[env_name](env_config) reward_fn = get_reward_fn(env_name, env_config) return env, reward_fn
def reward_fn(env_name, env_config, size): base = envs.get_reward_fn(env_name, env_config) wrapped = CorrelatedIrrelevant.wrap_env_function(base, size) return wrapped
def reward_fn(env_name, size, env_config): base = envs.get_reward_fn(env_name, env_config) wrapped = RandomIrrelevant.wrap_env_function(base, size) return wrapped