def test_instantiation_with_wrapper_factory(): """ Test instantiation, types and parsing for/of a wrapped (VR) environment. """ # Register dummy wrappers. registry = WrapperFactory() default_config = load_env_config(dummy_wrappers_module, "dummy_env_config_with_dummy_wrappers.yml") env_config = default_config['env'] env_config["core_env"] = {"_target_": DummyCoreEnvironment, "observation_space": ObservationConversion().space()} env: Wrapper[MazeEnv] = registry.wrap_from_config( DummyEnvironment(**env_config), default_config['wrappers'] ) # Make sure types are correctly inferred. assert isinstance(env, Wrapper) assert isinstance(env, DummyWrapper) assert isinstance(env, DummyWrapperA) assert isinstance(env, DummyWrapperB) assert isinstance(env, DummyEnvironment) # Check if arguments are set correctly and methods are available. assert getattr(env, "do_stuff") assert getattr(env, "arg_a") assert getattr(env, "arg_b") assert getattr(env, "arg_c") assert env.do_stuff() == "b"
def build_dummy_maze_environment() -> DummyEnvironment: """Instantiates a dummy Maze env.""" observation_conversion = ObservationConversion() return DummyEnvironment(core_env=DummyCoreEnvironment( observation_conversion.space()), action_conversion=[DictDiscreteActionConversion()], observation_conversion=[observation_conversion])
def test_reward_clipping_wrapper(): """ Unit tests """ observation_conversion = ObservationConversion() env = DummyEnvironment(core_env=DummyCoreEnvironment( observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion]) env.reset() action = env.action_space.sample() np.random.seed(1234) wrapped_env = RewardClippingWrapper(env, min_val=-0.1, max_val=0.1) np.random.seed(1234) wrapped_reward = wrapped_env.step(action)[1] assert -0.1 <= wrapped_reward <= 0.1
def test_sorted_observation_spaces_wrapper(): """ Unit tests """ observation_conversion = ObservationConversion() env = DummyEnvironment(core_env=DummyCoreEnvironment( observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion]) env = AddObservationWrapper.wrap(env) env = SortedSpacesWrapper.wrap(env) sorted_obs_space = env.observation_space assert list(sorted_obs_space.spaces.keys()) == list( sorted(sorted_obs_space.spaces.keys())) _, _ = env.get_observation_and_action_dicts({}, env.last_maze_action, first_step_in_episode=True)
def test_reward_scaling_wrapper(): """ Unit tests """ observation_conversion = ObservationConversion() env = DummyEnvironment(core_env=DummyCoreEnvironment( observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion]) env.reset() action = env.action_space.sample() np.random.seed(1234) original_reward = env.step(action)[1] wrapped_env = RewardScalingWrapper(env, scale=0.1) np.random.seed(1234) wrapped_reward = wrapped_env.step(action)[1] assert original_reward == wrapped_reward * 10
def test_skipping_wrapper_and_reward_aggregation(): """ Step skipping unit test """ observation_conversion = ObservationConversion() env = DummyEnvironment( core_env=DummyCoreEnvironment(observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion] ) n_steps = 3 env = StepSkipWrapper.wrap(env, n_steps=n_steps, skip_mode='sticky') env.reset() for _ in range(4): action = env.action_space.sample() obs, reward, done, info = env.step(action) assert(reward == n_steps*10)
def build_dummy_maze_env_with_structured_core_env() -> DummyEnvironment: """ Instantiates the DummyEnvironment. :return: Instance of a DummyEnvironment """ observation_conversion = ObservationConversion() return DummyEnvironment(core_env=DummyStructuredCoreEnvironment( observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion])
def test_sorted_action_spaces_wrapper(): """ Unit tests """ observation_conversion = ObservationConversion() env = DummyEnvironment(core_env=DummyCoreEnvironment( observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion]) env = AddActionWrapper.wrap(env) env = SortedSpacesWrapper.wrap(env) sorted_action_space = env.action_space assert list(sorted_action_space.spaces.keys()) == list( sorted(sorted_action_space.spaces.keys()))
def build_dummy_structured_environment() -> DummyStructuredEnvironment: """ Instantiates the DummyStructuredEnvironment. :return: Instance of a DummyStructuredEnvironment """ observation_conversion = PreProcessingObservationConversion() maze_env = DummyEnvironment( core_env=DummyCoreEnvironment(observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion]) return DummyStructuredEnvironment(maze_env=maze_env)
def test_return_normalization_wrapper(): """ Unit tests """ observation_conversion = ObservationConversion() env = DummyEnvironment( core_env=DummyCoreEnvironment(observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion]) env = ReturnNormalizationRewardWrapper(env, gamma=0.99, epsilon=1e-8) env.reset() reward = env.step(env.action_space.sample())[1] assert isinstance(reward, float) assert not hasattr(reward, 'shape')
def _build_env(): env = DummyEnvironment(core_env=DummyCoreEnvironment( gym.spaces.Discrete(10)), action_conversion=[{ "_target_": DoubleActionConversion }], observation_conversion=[{ "_target_": DoubleObservationConversion }]) env = _DummyActionWrapper.wrap(env) env = _DummyObservationWrapper.wrap(env) env = _DummyRewardWrapper.wrap(env) env = TimeLimitWrapper.wrap(env) env = LogStatsWrapper.wrap(env) env = TrajectoryRecordingWrapper.wrap(env) return env
def test_records_once_per_maze_step_in_multistep_envs(): """In multi-step envs, trajectory should be recorded once per Maze env step (not in each sub-step).""" observation_conversion = ObservationConversion() maze_env = DummyEnvironment( core_env=DummyCoreEnvironment(observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion] ) env = DummyStructuredEnvironment(maze_env) class TestWriter(TrajectoryWriter): """Mock writer checking the recorded data""" def __init__(self): self.episode_count = 0 self.step_count = 0 def write(self, episode_record: StateTrajectoryRecord): """Count steps and episodes""" self.episode_count += 1 self.step_count += len(episode_record.step_records) writer = TestWriter() TrajectoryWriterRegistry.writers = [] # Ensure there is no other writer TrajectoryWriterRegistry.register_writer(writer) env = TrajectoryRecordingWrapper.wrap(env) for _ in range(5): env.reset() for i in range(10): env.step(env.action_space.sample()) # final env reset required env.reset() env.close() # The step count should correspond to core env steps (disregarding sub-steps), # I.e., 5 flat steps per 10 structured + 1 final state for each episode assert writer.step_count == 5 * (5 + 1) assert writer.episode_count == 5
def test_logs_custom_env_time(): class CustomTimedDummyEnv(DummyCoreEnvironment, TimeEnvMixin): """A subclass of the dummy core env that has custom env time.""" def reset(self): """Start counting env time from 1337.""" obs = super().reset() self.context.step_id = 1337 return obs dummy_observation_conversion = DummyObservationConversion() core_env = CustomTimedDummyEnv( observation_space=dummy_observation_conversion.space()) env = DummyEnvironment( core_env=core_env, action_conversion=[DummyActionConversion()], observation_conversion=[dummy_observation_conversion]) class TestWriter(LogEventsWriter): """Mock writer checking the logged events""" def __init__(self): self.episode_count = 0 self.step_count = 0 def write(self, episode_record: EpisodeEventLog): """Count steps and episodes & check that env time is available and set to the correct value""" self.episode_count += 1 self.step_count += len(episode_record.step_event_logs) for step_id, step_event_log in enumerate( episode_record.step_event_logs): assert step_event_log.env_time == 1337 + step_id writer = TestWriter() _run_rollout_loop(env=env, n_episodes=5, n_steps_per_episode=10, writer=writer) assert writer.step_count == 5 * 10 assert writer.episode_count == 5
def test_wrap_method(): """ Tests .wrap() method. """ default_config: dict = load_env_config(dummy_wrappers_module, "dummy_env_config_with_dummy_wrappers.yml") env_config: dict = default_config['env'] env_config["core_env"] = {"_target_": DummyCoreEnvironment, "observation_space": ObservationConversion().space()} env = DummyEnvironment(**env_config) env_a: DummyWrapperA = DummyWrapperA.wrap(env, arg_a=1) assert isinstance(env_a, DummyWrapperA) try: DummyWrapperB.wrap(env) raise Exception("Wrapping shouldn't work without specifying the needed arguments.") except TypeError: pass env_b: DummyWrapperB = DummyWrapperB.wrap(env, arg_b=2, arg_c=3) assert isinstance(env_b, DummyWrapperB)
def test_records_once_per_maze_step_in_multistep_envs(): """In multi-step envs, events should be recorded once per Maze env step (not in each sub-step).""" class _SubStepEvents(ABC): @define_epoch_stats(sum) @define_episode_stats(len) @define_step_stats(len) def sub_step_event(self): """Dispatched in each sub step.""" class _CoreEnvEvents(ABC): @define_epoch_stats(sum) @define_episode_stats(len) @define_step_stats(len) def core_env_step_event(self): """Dispatched when core env steps (only once per step).""" class EventDummyEnv(DummyCoreEnvironment): """Dummy core env, dispatches one step event in every step.""" def __init__(self, observation_space: gym.spaces.space.Space): super().__init__(observation_space) self.dummy_events = self.pubsub.create_event_topic(_CoreEnvEvents) def step( self, maze_action: Dict ) -> Tuple[Dict[str, np.ndarray], float, bool, Optional[Dict]]: """Dispatch the step event...""" self.dummy_events.core_env_step_event() return super().step(maze_action) class DummyMultiStepEnv(DummyStructuredEnvironment): """Simulates simple two-step env. The underlying env is stepped only in the second step.""" def __init__(self, maze_env): super().__init__(maze_env) self.dummy_events = self.pubsub.create_event_topic(_SubStepEvents) def _action0( self, action ) -> Tuple[Dict, float, bool, Optional[Dict[str, np.ndarray]]]: self.dummy_events.sub_step_event() return {}, 0, False, None def _action1( self, action ) -> Tuple[Dict, float, bool, Optional[Dict[str, np.ndarray]]]: self.dummy_events.sub_step_event() return self.maze_env.step(action) class TestWriter(LogEventsWriter): """Testing writer. Keeps the episode event record.""" def __init__(self): self.episode_record = None def write(self, episode_record: EpisodeEventLog): """Store the record""" self.episode_record = episode_record # Init the env hierarchy observation_conversion = ObservationConversion() maze_env = DummyEnvironment( core_env=EventDummyEnv(observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion]) env = DummyMultiStepEnv(maze_env) # Run the rollout writer = TestWriter() _run_rollout_loop(env=env, n_episodes=1, n_steps_per_episode=10, writer=writer) # There should be one core env step event and two substep events recorded in every step. assert writer.episode_record is not None for step_id, step_event_log in enumerate( writer.episode_record.step_event_logs): if (step_id + 1) % 2 == 0: assert len( step_event_log.events.query_events( _CoreEnvEvents.core_env_step_event)) == 1 assert len( step_event_log.events.query_events( _SubStepEvents.sub_step_event)) == 2 assert step_event_log.env_time == step_id
def test_logs_events(): class CustomDummyKPICalculator(KpiCalculator): """ Dummy KPIs for dummy environment. """ def calculate_kpis(self, episode_event_log: EpisodeEventLog, last_maze_state: MazeStateType) -> Dict[str, float]: """ Returns a dummy KPI. """ return {"dummy_kpi": random.random()} class CustomDummyRewardAggregator(RewardAggregator): """ Customized dummy reward aggregator subscribed to BaseEnvEvents. """ def get_interfaces(self) -> List[Type[ABC]]: """ Return events class is subscribed to. """ additional_interfaces: List[Type[ABC]] = [BaseEnvEvents] parent_interfaces = super().get_interfaces() return additional_interfaces + parent_interfaces class CustomDummyCoreEnv(DummyCoreEnvironment): """ Customized dummy core env with serializable components that regenerates state only in step. """ def __init__(self, observation_space): super().__init__(observation_space) self.reward_aggregator = CustomDummyRewardAggregator() self.pubsub: Pubsub = Pubsub(self.context.event_service) self.pubsub.register_subscriber(self.reward_aggregator) self.base_event_publisher = self.pubsub.create_event_topic( BaseEnvEvents) self.kpi_calculator = CustomDummyKPICalculator() def get_kpi_calculator(self) -> CustomDummyKPICalculator: """KPIs are supported.""" return self.kpi_calculator class TestWriter(LogEventsWriter): """Mock writer checking the logged events""" def __init__(self): self.episode_count = 0 self.step_count = 0 def write(self, episode_record: EpisodeEventLog): """Count steps and episodes & check that some events were logged in each step""" self.episode_count += 1 self.step_count += len(episode_record.step_event_logs) for step_id, step_event_log in enumerate( episode_record.step_event_logs): assert step_id == step_event_log.env_time assert len(step_event_log.events) > 0 last_step_event_log = episode_record.step_event_logs[-1] kpi_count_in_last_step = len( list(last_step_event_log.events.query_events( BaseEnvEvents.kpi))) assert kpi_count_in_last_step != 0 # KPIs should be a part of the last step record only assert kpi_count_in_last_step == len( list(episode_record.query_events(BaseEnvEvents.kpi))) observation_conversion = ObservationConversion() writer = TestWriter() _run_rollout_loop(env=DummyEnvironment( core_env=CustomDummyCoreEnv(observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion]), n_episodes=5, n_steps_per_episode=10, writer=writer) assert writer.episode_count == 5 assert writer.step_count == 5 * 10
def test_records_maze_states_and_actions(): class CustomDummyRewardAggregator(RewardAggregator): """Customized dummy reward aggregator subscribed to BaseEnvEvents.""" def get_interfaces(self) -> List[Type[ABC]]: """ Return events class is subscribed to. """ additional_interfaces: List[Type[ABC]] = [BaseEnvEvents] parent_interfaces = super().get_interfaces() return additional_interfaces + parent_interfaces class CustomDummyCoreEnv(DummyCoreEnvironment): """ Customized dummy core env with serializable components that regenerates state only in step. """ def __init__(self, observation_space): super().__init__(observation_space) self.reward_aggregator = CustomDummyRewardAggregator() self.maze_state = self.observation_space.sample() self.pubsub: Pubsub = Pubsub(self.context.event_service) self.pubsub.register_subscriber(self.reward_aggregator) self.base_event_publisher = self.pubsub.create_event_topic(BaseEnvEvents) self.renderer = DummyRenderer() def get_renderer(self) -> DummyRenderer: """ Returns DummyRenderer. :return: DummyRenderer. """ return self.renderer def step(self, maze_action): """ Steps through the environment. """ self.maze_state = self.observation_space.sample() self.base_event_publisher.reward(10) return super().step(maze_action) def get_maze_state(self): """ Returns current state. """ return self.maze_state def get_serializable_components(self) -> Dict[str, Any]: """ Returns minimal dict. with components to serialize. """ return {"value": 0} class TestWriter(TrajectoryWriter): """Mock writer checking the recorded data""" def __init__(self): self.episode_count = 0 self.step_count = 0 self.episode_records = [] def write(self, episode_record: StateTrajectoryRecord): """Count steps and episodes & check instance types""" self.episode_records.append(episode_record) self.episode_count += 1 self.step_count += len(episode_record.step_records) for step_record in episode_record.step_records[:-1]: assert isinstance(step_record.maze_state, dict) assert isinstance(step_record.maze_action, dict) assert step_record.serializable_components != {} assert len(step_record.step_event_log.events) > 0 final_state_record = episode_record.step_records[-1] assert isinstance(final_state_record.maze_state, dict) assert final_state_record.maze_action is None assert final_state_record.serializable_components != {} assert isinstance(episode_record.renderer, Renderer) writer = TestWriter() TrajectoryWriterRegistry.writers = [] # Ensure there is no other writer TrajectoryWriterRegistry.register_writer(writer) # env = env_instantiation_example.example_1() observation_conversion = ObservationConversion() env = DummyEnvironment( core_env=CustomDummyCoreEnv(observation_conversion.space()), action_conversion=[DictActionConversion()], observation_conversion=[observation_conversion] ) env = TrajectoryRecordingWrapper.wrap(env) policy = DummyGreedyPolicy() states = [] # Observe changes in states over time. for _ in range(5): obs = env.reset() for _ in range(10): maze_state = env.get_maze_state() states.append(deepcopy(maze_state)) obs, _, _, _ = env.step(policy.compute_action(observation=obs, maze_state=maze_state, deterministic=True)) # final env reset required env.reset() assert writer.step_count == 5 * (10 + 1) # Count also the recorded final state assert writer.episode_count == 5 # Compare if the recorded inventory changes from the first episode match with the trajectory records for step_id in range(10): assert np.all( (states[step_id][key] == writer.episode_records[0].step_records[step_id].maze_state[key]) for key in env.observation_conversion.space().spaces )
def _env_factory(): return DummyEnvironment( core_env=DummyCoreEnvironment(gym.spaces.Discrete(10)), action_conversion=[DoubleActionConversion()], observation_conversion=[DoubleObservationConversion()])