Exemple #1
0
def build_dummy_maze_env_with_structured_core_env() -> DummyEnvironment:
    """
    Instantiates the DummyEnvironment.

    :return: Instance of a DummyEnvironment
    """
    observation_conversion = ObservationConversion()

    return DummyEnvironment(core_env=DummyStructuredCoreEnvironment(
        observation_conversion.space()),
                            action_conversion=[DictActionConversion()],
                            observation_conversion=[observation_conversion])
Exemple #2
0
def test_return_normalization_wrapper():
    """ Unit tests """
    observation_conversion = ObservationConversion()

    env = DummyEnvironment(
        core_env=DummyCoreEnvironment(observation_conversion.space()),
        action_conversion=[DictActionConversion()],
        observation_conversion=[observation_conversion])

    env = ReturnNormalizationRewardWrapper(env, gamma=0.99, epsilon=1e-8)
    env.reset()
    reward = env.step(env.action_space.sample())[1]
    assert isinstance(reward, float)
    assert not hasattr(reward, 'shape')
def build_dummy_structured_environment() -> DummyStructuredEnvironment:
    """
    Instantiates the DummyStructuredEnvironment.

    :return: Instance of a DummyStructuredEnvironment
    """

    observation_conversion = PreProcessingObservationConversion()

    maze_env = DummyEnvironment(
        core_env=DummyCoreEnvironment(observation_conversion.space()),
        action_conversion=[DictActionConversion()],
        observation_conversion=[observation_conversion])

    return DummyStructuredEnvironment(maze_env=maze_env)
def test_sorted_action_spaces_wrapper():
    """ Unit tests """
    observation_conversion = ObservationConversion()

    env = DummyEnvironment(core_env=DummyCoreEnvironment(
        observation_conversion.space()),
                           action_conversion=[DictActionConversion()],
                           observation_conversion=[observation_conversion])
    env = AddActionWrapper.wrap(env)

    env = SortedSpacesWrapper.wrap(env)

    sorted_action_space = env.action_space
    assert list(sorted_action_space.spaces.keys()) == list(
        sorted(sorted_action_space.spaces.keys()))
def test_reward_clipping_wrapper():
    """ Unit tests """
    observation_conversion = ObservationConversion()

    env = DummyEnvironment(core_env=DummyCoreEnvironment(
        observation_conversion.space()),
                           action_conversion=[DictActionConversion()],
                           observation_conversion=[observation_conversion])

    env.reset()
    action = env.action_space.sample()
    np.random.seed(1234)

    wrapped_env = RewardClippingWrapper(env, min_val=-0.1, max_val=0.1)
    np.random.seed(1234)
    wrapped_reward = wrapped_env.step(action)[1]

    assert -0.1 <= wrapped_reward <= 0.1
def test_reward_scaling_wrapper():
    """ Unit tests """
    observation_conversion = ObservationConversion()

    env = DummyEnvironment(core_env=DummyCoreEnvironment(
        observation_conversion.space()),
                           action_conversion=[DictActionConversion()],
                           observation_conversion=[observation_conversion])

    env.reset()
    action = env.action_space.sample()
    np.random.seed(1234)
    original_reward = env.step(action)[1]

    wrapped_env = RewardScalingWrapper(env, scale=0.1)
    np.random.seed(1234)
    wrapped_reward = wrapped_env.step(action)[1]

    assert original_reward == wrapped_reward * 10
def test_sorted_observation_spaces_wrapper():
    """ Unit tests """
    observation_conversion = ObservationConversion()

    env = DummyEnvironment(core_env=DummyCoreEnvironment(
        observation_conversion.space()),
                           action_conversion=[DictActionConversion()],
                           observation_conversion=[observation_conversion])
    env = AddObservationWrapper.wrap(env)

    env = SortedSpacesWrapper.wrap(env)

    sorted_obs_space = env.observation_space
    assert list(sorted_obs_space.spaces.keys()) == list(
        sorted(sorted_obs_space.spaces.keys()))

    _, _ = env.get_observation_and_action_dicts({},
                                                env.last_maze_action,
                                                first_step_in_episode=True)
def test_skipping_wrapper_and_reward_aggregation():
    """ Step skipping unit test """
    observation_conversion = ObservationConversion()

    env = DummyEnvironment(
        core_env=DummyCoreEnvironment(observation_conversion.space()),
        action_conversion=[DictActionConversion()],
        observation_conversion=[observation_conversion]
    )

    n_steps = 3
    env = StepSkipWrapper.wrap(env, n_steps=n_steps, skip_mode='sticky')

    env.reset()
    for _ in range(4):
        action = env.action_space.sample()
        obs, reward, done, info = env.step(action)

        assert(reward == n_steps*10)
def test_records_once_per_maze_step_in_multistep_envs():
    """In multi-step envs, trajectory should be recorded once per Maze env step (not in each sub-step)."""

    observation_conversion = ObservationConversion()
    maze_env = DummyEnvironment(
        core_env=DummyCoreEnvironment(observation_conversion.space()),
        action_conversion=[DictActionConversion()],
        observation_conversion=[observation_conversion]
    )
    env = DummyStructuredEnvironment(maze_env)

    class TestWriter(TrajectoryWriter):
        """Mock writer checking the recorded data"""

        def __init__(self):
            self.episode_count = 0
            self.step_count = 0

        def write(self, episode_record: StateTrajectoryRecord):
            """Count steps and episodes"""
            self.episode_count += 1
            self.step_count += len(episode_record.step_records)

    writer = TestWriter()
    TrajectoryWriterRegistry.writers = []  # Ensure there is no other writer
    TrajectoryWriterRegistry.register_writer(writer)

    env = TrajectoryRecordingWrapper.wrap(env)
    for _ in range(5):
        env.reset()
        for i in range(10):
            env.step(env.action_space.sample())

    # final env reset required
    env.reset()
    env.close()

    # The step count should correspond to core env steps (disregarding sub-steps),
    # I.e., 5 flat steps per 10 structured + 1 final state for each episode
    assert writer.step_count == 5 * (5 + 1)
    assert writer.episode_count == 5
def test_records_maze_states_and_actions():
    class CustomDummyRewardAggregator(RewardAggregator):
        """Customized dummy reward aggregator subscribed to BaseEnvEvents."""

        def get_interfaces(self) -> List[Type[ABC]]:
            """
            Return events class is subscribed to.
            """
            additional_interfaces: List[Type[ABC]] = [BaseEnvEvents]
            parent_interfaces = super().get_interfaces()
            return additional_interfaces + parent_interfaces

    class CustomDummyCoreEnv(DummyCoreEnvironment):
        """
        Customized dummy core env with serializable components that regenerates state only in step.
        """

        def __init__(self, observation_space):
            super().__init__(observation_space)
            self.reward_aggregator = CustomDummyRewardAggregator()
            self.maze_state = self.observation_space.sample()
            self.pubsub: Pubsub = Pubsub(self.context.event_service)
            self.pubsub.register_subscriber(self.reward_aggregator)
            self.base_event_publisher = self.pubsub.create_event_topic(BaseEnvEvents)
            self.renderer = DummyRenderer()

        def get_renderer(self) -> DummyRenderer:
            """
            Returns DummyRenderer.
            :return: DummyRenderer.
            """
            return self.renderer

        def step(self, maze_action):
            """
            Steps through the environment.
            """
            self.maze_state = self.observation_space.sample()
            self.base_event_publisher.reward(10)
            return super().step(maze_action)

        def get_maze_state(self):
            """
            Returns current state.
            """
            return self.maze_state

        def get_serializable_components(self) -> Dict[str, Any]:
            """
            Returns minimal dict. with components to serialize.
            """
            return {"value": 0}

    class TestWriter(TrajectoryWriter):
        """Mock writer checking the recorded data"""

        def __init__(self):
            self.episode_count = 0
            self.step_count = 0
            self.episode_records = []

        def write(self, episode_record: StateTrajectoryRecord):
            """Count steps and episodes & check instance types"""
            self.episode_records.append(episode_record)
            self.episode_count += 1
            self.step_count += len(episode_record.step_records)

            for step_record in episode_record.step_records[:-1]:
                assert isinstance(step_record.maze_state, dict)
                assert isinstance(step_record.maze_action, dict)
                assert step_record.serializable_components != {}
                assert len(step_record.step_event_log.events) > 0

            final_state_record = episode_record.step_records[-1]
            assert isinstance(final_state_record.maze_state, dict)
            assert final_state_record.maze_action is None
            assert final_state_record.serializable_components != {}

            assert isinstance(episode_record.renderer, Renderer)

    writer = TestWriter()
    TrajectoryWriterRegistry.writers = []  # Ensure there is no other writer
    TrajectoryWriterRegistry.register_writer(writer)

    # env = env_instantiation_example.example_1()
    observation_conversion = ObservationConversion()
    env = DummyEnvironment(
        core_env=CustomDummyCoreEnv(observation_conversion.space()),
        action_conversion=[DictActionConversion()],
        observation_conversion=[observation_conversion]
    )
    env = TrajectoryRecordingWrapper.wrap(env)

    policy = DummyGreedyPolicy()
    states = []  # Observe changes in states over time.

    for _ in range(5):
        obs = env.reset()
        for _ in range(10):
            maze_state = env.get_maze_state()
            states.append(deepcopy(maze_state))
            obs, _, _, _ = env.step(policy.compute_action(observation=obs, maze_state=maze_state, deterministic=True))

    # final env reset required
    env.reset()

    assert writer.step_count == 5 * (10 + 1)  # Count also the recorded final state
    assert writer.episode_count == 5

    # Compare if the recorded inventory changes from the first episode match with the trajectory records
    for step_id in range(10):
        assert np.all(
            (states[step_id][key] == writer.episode_records[0].step_records[step_id].maze_state[key])
            for key in env.observation_conversion.space().spaces
        )
Exemple #11
0
def test_raises_exception_on_invalid_type():
    with pytest.raises(AssertionError):
        registry = Factory(base_type=DummyObservationConversion)
        registry.instantiate(config=DictActionConversion())
Exemple #12
0
def test_logs_events():
    class CustomDummyKPICalculator(KpiCalculator):
        """
        Dummy KPIs for dummy environment.
        """
        def calculate_kpis(self, episode_event_log: EpisodeEventLog,
                           last_maze_state: MazeStateType) -> Dict[str, float]:
            """
            Returns a dummy KPI.
            """
            return {"dummy_kpi": random.random()}

    class CustomDummyRewardAggregator(RewardAggregator):
        """
        Customized dummy reward aggregator subscribed to BaseEnvEvents.
        """
        def get_interfaces(self) -> List[Type[ABC]]:
            """
            Return events class is subscribed to.
            """
            additional_interfaces: List[Type[ABC]] = [BaseEnvEvents]
            parent_interfaces = super().get_interfaces()
            return additional_interfaces + parent_interfaces

    class CustomDummyCoreEnv(DummyCoreEnvironment):
        """
        Customized dummy core env with serializable components that regenerates state only in step.
        """
        def __init__(self, observation_space):
            super().__init__(observation_space)
            self.reward_aggregator = CustomDummyRewardAggregator()
            self.pubsub: Pubsub = Pubsub(self.context.event_service)
            self.pubsub.register_subscriber(self.reward_aggregator)
            self.base_event_publisher = self.pubsub.create_event_topic(
                BaseEnvEvents)
            self.kpi_calculator = CustomDummyKPICalculator()

        def get_kpi_calculator(self) -> CustomDummyKPICalculator:
            """KPIs are supported."""
            return self.kpi_calculator

    class TestWriter(LogEventsWriter):
        """Mock writer checking the logged events"""
        def __init__(self):
            self.episode_count = 0
            self.step_count = 0

        def write(self, episode_record: EpisodeEventLog):
            """Count steps and episodes & check that some events were logged in each step"""
            self.episode_count += 1
            self.step_count += len(episode_record.step_event_logs)

            for step_id, step_event_log in enumerate(
                    episode_record.step_event_logs):
                assert step_id == step_event_log.env_time
                assert len(step_event_log.events) > 0

            last_step_event_log = episode_record.step_event_logs[-1]
            kpi_count_in_last_step = len(
                list(last_step_event_log.events.query_events(
                    BaseEnvEvents.kpi)))
            assert kpi_count_in_last_step != 0
            # KPIs should be a part of the last step record only
            assert kpi_count_in_last_step == len(
                list(episode_record.query_events(BaseEnvEvents.kpi)))

    observation_conversion = ObservationConversion()
    writer = TestWriter()
    _run_rollout_loop(env=DummyEnvironment(
        core_env=CustomDummyCoreEnv(observation_conversion.space()),
        action_conversion=[DictActionConversion()],
        observation_conversion=[observation_conversion]),
                      n_episodes=5,
                      n_steps_per_episode=10,
                      writer=writer)

    assert writer.episode_count == 5
    assert writer.step_count == 5 * 10
Exemple #13
0
def test_records_once_per_maze_step_in_multistep_envs():
    """In multi-step envs, events should be recorded once per Maze env step (not in each sub-step)."""
    class _SubStepEvents(ABC):
        @define_epoch_stats(sum)
        @define_episode_stats(len)
        @define_step_stats(len)
        def sub_step_event(self):
            """Dispatched in each sub step."""

    class _CoreEnvEvents(ABC):
        @define_epoch_stats(sum)
        @define_episode_stats(len)
        @define_step_stats(len)
        def core_env_step_event(self):
            """Dispatched when core env steps (only once per step)."""

    class EventDummyEnv(DummyCoreEnvironment):
        """Dummy core env, dispatches one step event in every step."""
        def __init__(self, observation_space: gym.spaces.space.Space):
            super().__init__(observation_space)
            self.dummy_events = self.pubsub.create_event_topic(_CoreEnvEvents)

        def step(
            self, maze_action: Dict
        ) -> Tuple[Dict[str, np.ndarray], float, bool, Optional[Dict]]:
            """Dispatch the step event..."""
            self.dummy_events.core_env_step_event()
            return super().step(maze_action)

    class DummyMultiStepEnv(DummyStructuredEnvironment):
        """Simulates simple two-step env. The underlying env is stepped only in the second step."""
        def __init__(self, maze_env):
            super().__init__(maze_env)
            self.dummy_events = self.pubsub.create_event_topic(_SubStepEvents)

        def _action0(
            self, action
        ) -> Tuple[Dict, float, bool, Optional[Dict[str, np.ndarray]]]:
            self.dummy_events.sub_step_event()
            return {}, 0, False, None

        def _action1(
            self, action
        ) -> Tuple[Dict, float, bool, Optional[Dict[str, np.ndarray]]]:
            self.dummy_events.sub_step_event()
            return self.maze_env.step(action)

    class TestWriter(LogEventsWriter):
        """Testing writer. Keeps the episode event record."""
        def __init__(self):
            self.episode_record = None

        def write(self, episode_record: EpisodeEventLog):
            """Store the record"""
            self.episode_record = episode_record

    # Init the env hierarchy
    observation_conversion = ObservationConversion()
    maze_env = DummyEnvironment(
        core_env=EventDummyEnv(observation_conversion.space()),
        action_conversion=[DictActionConversion()],
        observation_conversion=[observation_conversion])
    env = DummyMultiStepEnv(maze_env)

    # Run the rollout
    writer = TestWriter()
    _run_rollout_loop(env=env,
                      n_episodes=1,
                      n_steps_per_episode=10,
                      writer=writer)

    # There should be one core env step event and two substep events recorded in every step.
    assert writer.episode_record is not None
    for step_id, step_event_log in enumerate(
            writer.episode_record.step_event_logs):
        if (step_id + 1) % 2 == 0:
            assert len(
                step_event_log.events.query_events(
                    _CoreEnvEvents.core_env_step_event)) == 1
        assert len(
            step_event_log.events.query_events(
                _SubStepEvents.sub_step_event)) == 2
        assert step_event_log.env_time == step_id