def test_step_limit_with_single_env_dataset(env_name: str): env = gym.make(env_name) start = 0 target = 10 env = DummyEnvironment(start=start, target=target, max_value=10 * 2) env = EnvDataset(env) max_steps = 5 env = ObservationLimit(env, max_steps=max_steps) env.seed(123) values = [] for i, obs in zip(range(100), env): values.append(obs) _ = env.send(1) assert values == list(range(start, max_steps)) assert env.is_closed with pytest.raises(gym.error.ClosedEnvironmentError): env.reset() with pytest.raises(gym.error.ClosedEnvironmentError): env.step(env.action_space.sample()) with pytest.raises(gym.error.ClosedEnvironmentError): for i, _ in zip(range(5), env): assert False
def test_step_normally_works_fine(): env = DummyEnvironment() env = EnvDataset(env) env.seed(123) obs = env.reset() assert obs == 0 obs, reward, done, info = env.step(0) assert (obs, reward, done, info) == (0, 5, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (1, 4, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (2, 3, False, {}) obs, reward, done, info = env.step(2) assert (obs, reward, done, info) == (1, 4, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (2, 3, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (3, 2, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (4, 1, False, {}) obs, reward, done, info = env.step(1) assert (obs, reward, done, info) == (5, 0, True, {}) env.reset() obs, reward, done, info = env.step(0) assert (obs, reward, done, info) == (0, 5, False, {})
def test_doesnt_raise_error_when_action_sent(): env = DummyEnvironment() with EnvDataset(env) as env: env.reset() env.seed(123) for i, obs in zip(range(5), env): assert obs in env.observation_space reward = env.send(env.action_space.sample())
def test_raise_error_when_missing_action(): env = DummyEnvironment() with EnvDataset(env) as env: env.reset() env.seed(123) with pytest.raises(RuntimeError): for i, observation in zip(range(5), env): pass
def test_measure_RL_performance_basics(): env = DummyEnvironment(start=0, target=5, max_value=10) from sequoia.settings.active.continual.continual_rl_setting import \ ContinualRLSetting # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards) env = MeasureRLPerformanceWrapper(env) env.seed(123) all_episode_rewards = [] all_episode_steps = [] for episode in range(5): episode_steps = 0 episode_reward = 0 obs = env.reset() print(f"Episode {episode}, obs: {obs}") done = False while not done: action = env.action_space.sample() obs, reward, done, info = env.step(action) episode_reward += reward episode_steps += 1 # print(obs, reward, done, info) all_episode_steps.append(episode_steps) all_episode_rewards.append(episode_reward) from itertools import accumulate expected_metrics = {} for episode_steps, cumul_step, episode_reward in zip( all_episode_steps, accumulate(all_episode_steps), all_episode_rewards): expected_metrics[cumul_step] = EpisodeMetrics( n_samples=1, mean_episode_reward=episode_reward, mean_episode_length=episode_steps, ) assert env.get_online_performance() == expected_metrics