def test_configures_from_hydra():
    # Note: Needs to be `conf_rollout`, so the policy config is present as well
    cfg = read_hydra_config(config_module="maze.conf", config_name="conf_rollout", env="gym_env")

    agent_deployment = AgentDeployment(
        policy=cfg.policy,
        env=cfg.env,
        wrappers=cfg.wrappers
    )

    external_env = EnvFactory(cfg.env, wrappers={})().core_env

    maze_state = external_env.reset()
    reward, done, info = 0, False, {}

    for i in range(10):
        maze_action = agent_deployment.act(maze_state, reward, done, info)
        maze_state, reward, done, info = external_env.step(maze_action)

    agent_deployment.close(maze_state, reward, done, info)
    assert agent_deployment.env.get_stats_value(
        BaseEnvEvents.reward,
        LogStatsLevel.EPOCH,
        name="total_step_count"
    ) == 10
def test_records_stats():
    env = LogStatsWrapper.wrap(build_dummy_maze_env())
    agent_deployment = AgentDeployment(
        policy=DummyGreedyPolicy(),
        env=env
    )

    # Step the environment manually here and query the agent integration wrapper for maze_actions
    test_core_env = build_dummy_maze_env().core_env
    maze_state = test_core_env.reset()
    reward, done, info = 0, False, {}

    for i in range(5):
        maze_action = agent_deployment.act(maze_state, reward, done, info)
        maze_state, reward, done, info = test_core_env.step(maze_action)

    agent_deployment.close(maze_state, reward, done, info)
    assert env.get_stats_value(
        RewardEvents.reward_original,
        LogStatsLevel.EPOCH,
        name="total_step_count"
    ) == 5

    assert env.get_stats_value(
        BaseEnvEvents.reward,
        LogStatsLevel.EPOCH,
        name="total_step_count"
    ) == 5
def test_supports_multi_step_wrappers():
    env = build_dummy_structured_env()
    env = LogStatsWrapper.wrap(env)
    agent_deployment = AgentDeployment(
        policy=DummyGreedyPolicy(),
        env=env
    )

    # Step the environment manually here and query the agent integration wrapper for maze_actions
    test_core_env = build_dummy_structured_env().core_env
    maze_state = test_core_env.reset()
    reward, done, info = 0, False, {}

    for i in range(4):
        maze_action = agent_deployment.act(maze_state, reward, done, info)
        maze_state, reward, done, info = test_core_env.step(maze_action)

    agent_deployment.close(maze_state, reward, done, info)
    assert env.get_stats_value(
        BaseEnvEvents.reward,
        LogStatsLevel.EPOCH,
        name="total_step_count"
    ) == 4  # Step count is still 4 event with multiple sub-steps, as it is detected based on env time

    assert env.get_stats_value(
        RewardEvents.reward_original,
        LogStatsLevel.EPOCH,
        name="total_step_count"
    ) == 4
def test_works_with_gym_maze_envs():
    env = GymMazeEnv("CartPole-v0")
    policy = RandomPolicy(action_spaces_dict=env.action_spaces_dict)

    agent_deployment = AgentDeployment(
        policy=policy,
        env=env
    )

    external_env = gym.make("CartPole-v0")

    maze_state = external_env.reset()
    reward, done, info = 0, False, {}

    for i in range(10):
        maze_action = agent_deployment.act(maze_state, reward, done, info)
        maze_state, reward, done, info = external_env.step(maze_action)

    agent_deployment.close(maze_state, reward, done, info)
def test_supports_trajectory_recording_wrapper():
    """
    Tests whether agent integration supports trajectory recording wrappers.
    """

    class TestWriter(TrajectoryWriter):
        """Mock writer for checking that trajectory recording goes through."""

        def __init__(self):
            self.step_count = 0

        def write(self, episode_record: StateTrajectoryRecord):
            """Count recorded steps"""
            self.step_count += len(episode_record.step_records)
            assert episode_record.renderer is not None

    step_count = 10

    writer = TestWriter()
    TrajectoryWriterRegistry.writers = []  # Ensure there is no other writer
    TrajectoryWriterRegistry.register_writer(writer)

    agent_deployment = AgentDeployment(
        policy=DummyGreedyPolicy(),
        env=TrajectoryRecordingWrapper.wrap(build_dummy_maze_env()),
    )

    # Step the environment manually here and query the agent integration wrapper for maze_actions
    test_core_env = build_dummy_maze_env().core_env
    maze_state = test_core_env.reset()
    reward, done, info = None, None, None
    for i in range(10):
        maze_action = agent_deployment.act(maze_state, reward, done, info)
        maze_state, reward, done, info = test_core_env.step(maze_action)

    # Rollout needs to be finished to notify the wrappers
    agent_deployment.close(maze_state, reward, done, info)

    assert writer.step_count == step_count + 1  # count terminal state as well
import gym

from maze.core.agent.random_policy import RandomPolicy
from maze.core.agent_deployment.agent_deployment import AgentDeployment
from maze.core.wrappers.maze_gym_env_wrapper import GymMazeEnv

env = GymMazeEnv("CartPole-v0")
policy = RandomPolicy(action_spaces_dict=env.action_spaces_dict)

agent_deployment = AgentDeployment(policy=policy, env=env)

# Simulate an external production environment that does not use Maze
external_env = gym.make("CartPole-v0")

maze_state = external_env.reset()
reward, done, info = 0, False, {}

for i in range(10):
    # Query the agent deployment for maze action, then step the environment with it
    maze_action = agent_deployment.act(maze_state, reward, done, info)
    maze_state, reward, done, info = external_env.step(maze_action)

agent_deployment.close(maze_state, reward, done, info)
def test_writes_event_and_stats_logs():
    class TestEventsWriter(LogEventsWriter):
        """Test event writer for checking logged events."""

        def __init__(self):
            self.step_count = 0
            self.reward_events_count = 0

        def write(self, episode_record: EpisodeEventLog):
            """Check that we have some reward events as well as env-specific events."""

            self.step_count += len(episode_record.step_event_logs)
            self.reward_events_count += len(list(episode_record.query_events(BaseEnvEvents.reward)))

    class TestStatsWriter(LogStatsWriter):
        """Test stats writer for checking if stats get calculated."""

        def __init__(self):
            self.collected_stats_count = 0

        def write(self, path: str, step: int, stats: LogStats) -> None:
            """Count number of stats items received"""
            self.collected_stats_count += len(stats)
            pass

    step_count = 10

    # Event logging
    events_writer = TestEventsWriter()
    LogEventsWriterRegistry.writers = []  # Ensure there is no other writer
    LogEventsWriterRegistry.register_writer(events_writer)

    # Stats logging
    stats_writer = TestStatsWriter()
    register_log_stats_writer(stats_writer)

    agent_deployment = AgentDeployment(
        policy=DummyGreedyPolicy(),
        env=build_dummy_maze_env(),
        wrappers={LogStatsWrapper: {"logging_prefix": "test"}}
    )

    # Step the environment manually here and query the agent integration wrapper for maze_actions
    test_core_env = build_dummy_maze_env().core_env
    maze_state = test_core_env.reset()
    reward, done, info = None, None, None
    for i in range(step_count):
        maze_action = agent_deployment.act(maze_state, reward, done, info,
                                            events=list(test_core_env.get_step_events()))
        state, reward, done, info = test_core_env.step(maze_action)
        test_core_env.context.increment_env_step()  # Done by maze env ordinarily

    # Rollout needs to be finished to notify the wrappers
    agent_deployment.close(maze_state, reward, done, info, events=list(test_core_env.get_step_events()))

    # Event logging
    assert events_writer.step_count == step_count
    assert events_writer.reward_events_count == step_count

    # Stats logging
    assert stats_writer.collected_stats_count > 0