def test_measure_RL_performance_basics(): env = DummyEnvironment(start=0, target=5, max_value=10) from sequoia.settings.active.continual.continual_rl_setting import \ ContinualRLSetting # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards) env = MeasureRLPerformanceWrapper(env) env.seed(123) all_episode_rewards = [] all_episode_steps = [] for episode in range(5): episode_steps = 0 episode_reward = 0 obs = env.reset() print(f"Episode {episode}, obs: {obs}") done = False while not done: action = env.action_space.sample() obs, reward, done, info = env.step(action) episode_reward += reward episode_steps += 1 # print(obs, reward, done, info) all_episode_steps.append(episode_steps) all_episode_rewards.append(episode_reward) from itertools import accumulate expected_metrics = {} for episode_steps, cumul_step, episode_reward in zip( all_episode_steps, accumulate(all_episode_steps), all_episode_rewards): expected_metrics[cumul_step] = EpisodeMetrics( n_samples=1, mean_episode_reward=episode_reward, mean_episode_length=episode_steps, ) assert env.get_online_performance() == expected_metrics
def test_measure_RL_performance_iteration(): env = DummyEnvironment(start=0, target=5, max_value=10) from gym.wrappers import TimeLimit max_episode_steps = 50 env = EnvDataset(env) env = TimeLimit(env, max_episode_steps=max_episode_steps) # env = TypedObjectsWrapper(env, observations_type=ContinualRLSetting.Observations, actions_type=ContinualRLSetting.Actions, rewards_type=ContinualRLSetting.Rewards) env = MeasureRLPerformanceWrapper(env) env.seed(123) all_episode_rewards = [] all_episode_steps = [] for episode in range(5): episode_steps = 0 episode_reward = 0 for step, obs in enumerate(env): print(f"Episode {episode}, obs: {obs}") action = env.action_space.sample() reward = env.send(action) episode_reward += reward episode_steps += 1 # print(obs, reward, done, info) assert step <= max_episode_steps, "shouldn't be able to iterate longer than that." all_episode_steps.append(episode_steps) all_episode_rewards.append(episode_reward) expected_metrics = {} for episode_steps, cumul_step, episode_reward in zip( all_episode_steps, accumulate(all_episode_steps), all_episode_rewards): expected_metrics[cumul_step] = EpisodeMetrics( n_samples=1, mean_episode_reward=episode_reward, mean_episode_length=episode_steps, ) assert env.get_online_performance() == expected_metrics