Example #1
0
def test_propagates_exceptions_to_main_thread():
    class FailingPolicy(DummyGreedyPolicy):
        """Mock policy, throws an error every time."""

        def compute_action(self,
                           observation: ObservationType,
                           maze_state: Optional[MazeStateType] = None,
                           env: Optional[BaseEnv] = None,
                           actor_id: ActorID = None,
                           deterministic: bool = False) -> ActionType:
            """Throw an error."""
            raise RuntimeError("Test error.")

        def compute_top_action_candidates(self, observation: ObservationType, num_candidates: Optional[int],
                                          maze_state: Optional[MazeStateType], env: Optional[BaseEnv],
                                          actor_id: ActorID = None) \
                -> Tuple[Sequence[ActionType], Sequence[float]]:
            """Not used"""

    agent_deployment = AgentDeployment(
        policy=FailingPolicy(),
        env=build_dummy_maze_env()
    )

    test_core_env = build_dummy_maze_env().core_env
    s = test_core_env.reset()  # Just get a valid state, the content is not really important
    with pytest.raises(RuntimeError) as e_info:
        agent_deployment.act(s, 0, False, {})
Example #2
0
def test_steps_env_with_single_policy():
    agent_deployment = AgentDeployment(
        policy=DummyGreedyPolicy(),
        env=build_dummy_maze_env()
    )

    # Step the environment manually here and query the agent integration wrapper for maze_actions
    test_policy = DummyGreedyPolicy()
    test_env = build_dummy_maze_env()
    maze_state = test_env.reset()
    reward, done, info = None, None, None

    for i in range(10):
        maze_action = agent_deployment.act(maze_state, reward, done, info)

        # Compare with the expected maze_action on top of the env that we are stepping
        raw_expected_action = test_policy.compute_action(
            observation=test_env.observation_conversion.maze_to_space(maze_state),
            maze_state=maze_state, deterministic=True)
        expected_action = test_env.action_conversion.space_to_maze(raw_expected_action, maze_state
                                                                   )
        assert expected_action.keys() == maze_action.keys()
        assert np.all(expected_action[key] == maze_action[key] for key in maze_action.keys())

        maze_state, reward, done, info = test_env.step(expected_action)
Example #3
0
def test_records_stats():
    env = LogStatsWrapper.wrap(build_dummy_maze_env())
    agent_deployment = AgentDeployment(
        policy=DummyGreedyPolicy(),
        env=env
    )

    # Step the environment manually here and query the agent integration wrapper for maze_actions
    test_core_env = build_dummy_maze_env().core_env
    maze_state = test_core_env.reset()
    reward, done, info = 0, False, {}

    for i in range(5):
        maze_action = agent_deployment.act(maze_state, reward, done, info)
        maze_state, reward, done, info = test_core_env.step(maze_action)

    agent_deployment.close(maze_state, reward, done, info)
    assert env.get_stats_value(
        RewardEvents.reward_original,
        LogStatsLevel.EPOCH,
        name="total_step_count"
    ) == 5

    assert env.get_stats_value(
        BaseEnvEvents.reward,
        LogStatsLevel.EPOCH,
        name="total_step_count"
    ) == 5
Example #4
0
def test_gets_maze_action_candidates():
    class StaticPolicy(DummyGreedyPolicy):
        """Mock policy, returns static action candidates (careful, always three of them)."""

        def compute_top_action_candidates(self, observation: ObservationType, num_candidates: Optional[int],
                                          maze_state: Optional[MazeStateType], env: Optional[BaseEnv],
                                          actor_id: ActorID = None) \
                -> Tuple[Sequence[ActionType], Sequence[float]]:
            """Return static action candidates"""

            return (
                [{"action_0_0": j, "action_1_0": j, "action_1_1": [j % 2] * 5} for j in range(3)],
                [0.95, 0.04, 0.01]
            )

    env = build_dummy_maze_env()
    core_env, act_conv, obs_conv = env.core_env, env.action_conversion, env.observation_conversion

    agent_deployment = AgentDeployment(
        policy=StaticPolicy(),
        env=build_dummy_maze_env(),
        num_candidates=3
    )

    test_core_env = build_dummy_maze_env().core_env
    maze_state = test_core_env.reset()  # Just get a valid state, the content is not really important
    for i in range(10):
        maze_action = agent_deployment.act(maze_state, 0, False, {})
        assert isinstance(maze_action, MazeActionCandidates)
        assert maze_action.candidates[0]["action_0_0"] == 0
        assert maze_action.candidates[1]["action_0_0"] == 1
        assert maze_action.candidates[2]["action_0_0"] == 2
        assert maze_action.probabilities == [0.95, 0.04, 0.01]
def test_action_monitoring():
    """ Action logging unit test """

    # instantiate env
    env = build_dummy_maze_env()

    env = MazeEnvMonitoringWrapper.wrap(env,
                                        observation_logging=False,
                                        action_logging=True,
                                        reward_logging=False)
    env = LogStatsWrapper.wrap(env)  # for accessing events from previous steps
    env.reset()

    # test application of wrapper
    for ii in range(2):
        env.step(env.action_space.sample())

        action_events = env.get_last_step_events(query=[
            ActionEvents.discrete_action, ActionEvents.continuous_action,
            ActionEvents.multi_binary_action
        ])

        assert len(action_events) == 7
        for event in action_events:
            if event.attributes['name'] in [
                    'action_0_0', 'action_0_1_0', 'action_0_1_1', 'action_1_0'
            ]:
                assert event.interface_method == ActionEvents.discrete_action
            elif event.attributes['name'] in ['action_0_2', 'action_2_0']:
                assert event.interface_method == ActionEvents.continuous_action
            elif event.attributes['name'] in ['action_1_1']:
                assert event.interface_method == ActionEvents.multi_binary_action
            else:
                raise ValueError
Example #6
0
def _test_dummy_env_for_split_config(split_config: Dict[str, Dict]) -> None:
    """Test the split action wrapper on the dummy env for a given split_config

    :param split_config: The split action config to apply and test.
    """
    base_env = build_dummy_maze_env()

    env = SplitActionsWrapper.wrap(base_env, split_config=split_config)

    _ = env.action_space

    for key, sub_actions in split_config.items():
        for sub_key in sub_actions.keys():
            assert sub_key in env.action_space.spaces.keys()
        assert key not in env.action_space.spaces.keys()

    assert env.action_spaces_dict[0] == env.action_space

    base_env_action = base_env.action_space.sample()
    split_action = env.reverse_action(base_env_action)

    for key, sub_actions in split_config.items():
        for sub_key in sub_actions.keys():
            assert sub_key in split_action
        assert key not in split_action

    for key in base_env.action_space.spaces.keys():
        assert np.all(base_env_action[key] == env.action(split_action)[key])
Example #7
0
def test_rollout_evaluator():
    env = SequentialVectorEnv([lambda: TimeLimitWrapper.wrap(build_dummy_maze_env(), max_episode_steps=2)] * 2)
    policy = flatten_concat_probabilistic_policy_for_env(build_dummy_maze_env())
    model_selection = _MockModelSelection()

    evaluator = RolloutEvaluator(eval_env=env, n_episodes=3, model_selection=model_selection)
    for i in range(2):
        evaluator.evaluate(policy)
        increment_log_step()

    assert model_selection.update_count == 2
    assert evaluator.eval_env.get_stats_value(
        BaseEnvEvents.reward,
        LogStatsLevel.EPOCH,
        name="total_episode_count"
    ) >= 2 * 3
Example #8
0
def test_actor_id_is_optional_for_single_network_policies():
    env = build_dummy_maze_env()
    policy = flatten_concat_probabilistic_policy_for_env(env)

    obs = env.reset()
    action = policy.compute_action(obs)  # No actor ID provided
    assert all([key in env.action_space.spaces for key in action.keys()])
def test_observation_monitoring():
    """ Observation logging unit test """

    # instantiate env
    env = build_dummy_maze_env()

    env = MazeEnvMonitoringWrapper.wrap(env,
                                        observation_logging=True,
                                        action_logging=False,
                                        reward_logging=False)
    env = LogStatsWrapper.wrap(env)  # for accessing events from previous steps
    env.reset()

    # test application of wrapper
    for ii in range(3):
        # Observation will get reported in the next step (when the agent is actually acting on it)
        obs = env.step(env.action_space.sample())[0]

        observation_events = env.get_last_step_events(query=[
            ObservationEvents.observation_original,
            ObservationEvents.observation_processed
        ])
        assert len(observation_events) == 4
        for event in observation_events:
            assert issubclass(event.interface_class, ObservationEvents)
            obs_name = event.attributes['name']
            assert obs_name in ['observation_0', 'observation_1']
            if ii > 0:
                assert np.allclose(np.asarray(obs[obs_name]),
                                   np.asarray(event.attributes['value']))
Example #10
0
def test_reward_monitoring():
    """ Reward logging unit test """

    # instantiate env
    env = build_dummy_maze_env()

    env = MazeEnvMonitoringWrapper.wrap(env,
                                        observation_logging=False,
                                        action_logging=False,
                                        reward_logging=True)
    env = LogStatsWrapper.wrap(env)  # for accessing events from previous steps
    env.reset()
    env.step(env.action_space.sample())

    # test application of wrapper
    for ii in range(2):
        env.step(env.action_space.sample())

        reward_events = env.get_last_step_events(query=[
            RewardEvents.reward_original, RewardEvents.reward_processed
        ])

        assert len(reward_events) == 2
        for event in reward_events:
            assert issubclass(event.interface_class, RewardEvents)
            assert event.attributes['value'] == 10
            assert event.interface_method in [
                RewardEvents.reward_original, RewardEvents.reward_processed
            ]
Example #11
0
def test_does_not_carry_over_stats_from_unfinished_episodes():
    policy = flatten_concat_probabilistic_policy_for_env(build_dummy_maze_env())

    # Wrap envs in a time-limit wrapper
    env = SequentialVectorEnv([lambda: TimeLimitWrapper.wrap(build_dummy_maze_env())] * 2)

    # Make one env slower than the other
    env.envs[0].set_max_episode_steps(2)
    env.envs[1].set_max_episode_steps(10)

    evaluator = RolloutEvaluator(eval_env=env, n_episodes=1, model_selection=None)
    for i in range(2):
        evaluator.evaluate(policy)
        increment_log_step()

        # We should get just one episode counted in stats
        assert evaluator.eval_env.get_stats_value(
            BaseEnvEvents.reward,
            LogStatsLevel.EPOCH,
            name="episode_count"
        ) == 1
Example #12
0
def test_records_events_in_reset():
    env = build_dummy_maze_env()
    env = _EventsInResetWrapper.wrap(env)
    env = LogStatsWrapper.wrap(env)

    env.reset()
    for i in range(5):
        env.step(env.action_space.sample())

    env.write_epoch_stats()
    assert env.get_stats_value(
        BaseEnvEvents.test_event, LogStatsLevel.EPOCH
    ) == 1  # only from the single event fired during env reset
Example #13
0
def test_supports_trajectory_recording_wrapper():
    """
    Tests whether agent integration supports trajectory recording wrappers.
    """

    class TestWriter(TrajectoryWriter):
        """Mock writer for checking that trajectory recording goes through."""

        def __init__(self):
            self.step_count = 0

        def write(self, episode_record: StateTrajectoryRecord):
            """Count recorded steps"""
            self.step_count += len(episode_record.step_records)
            assert episode_record.renderer is not None

    step_count = 10

    writer = TestWriter()
    TrajectoryWriterRegistry.writers = []  # Ensure there is no other writer
    TrajectoryWriterRegistry.register_writer(writer)

    agent_deployment = AgentDeployment(
        policy=DummyGreedyPolicy(),
        env=TrajectoryRecordingWrapper.wrap(build_dummy_maze_env()),
    )

    # Step the environment manually here and query the agent integration wrapper for maze_actions
    test_core_env = build_dummy_maze_env().core_env
    maze_state = test_core_env.reset()
    reward, done, info = None, None, None
    for i in range(10):
        maze_action = agent_deployment.act(maze_state, reward, done, info)
        maze_state, reward, done, info = test_core_env.step(maze_action)

    # Rollout needs to be finished to notify the wrappers
    agent_deployment.close(maze_state, reward, done, info)

    assert writer.step_count == step_count + 1  # count terminal state as well
Example #14
0
def test_time_limit_wrapper():
    """ time limit wrapper unit tests """
    env = build_dummy_maze_env()
    env = TimeLimitWrapper.wrap(env, max_episode_steps=5)
    env.set_max_episode_steps(max_episode_steps=5)

    env.seed(1234)
    env.reset()
    for i in range(5):
        obs, rew, done, info = env.step(env.action_space.sample())
        if i >= 4:
            assert done
    env.close()
Example #15
0
def test_terminates_on_done():
    """Resetting the env or terminating rollout early when the env is done."""
    env = build_dummy_maze_env()
    env = TimeLimitWrapper.wrap(env, max_episode_steps=5)
    policy = RandomPolicy(env.action_spaces_dict)

    # Normal operation (should reset the env automatically and continue rollout)
    rollout_generator = RolloutGenerator(env=env)
    trajectory = rollout_generator.rollout(policy, n_steps=10)
    assert len(trajectory) == 10

    # Terminate on done
    rollout_generator = RolloutGenerator(env=env, terminate_on_done=True)
    trajectory = rollout_generator.rollout(policy, n_steps=10)
    assert len(trajectory) == 5
Example #16
0
def test_dummy_maze_env():
    """
    Unit test for the DummyEnvironment
    """
    env = build_dummy_maze_env()
    _ = env.reset()

    for _ in range(10):
        action = env.action_space.sample()
        observation, _, _, _ = env.step(action)

    # check if actor is done
    assert not env.is_actor_done()

    env.close()
Example #17
0
def test_dummy_struct_env():
    """
    Unit test for the DummyStructuredEnvironment
    """
    maze_env = build_dummy_maze_env()
    env = DummyStructuredEnvironment(maze_env)
    env.reset()

    # check observation space
    assert isinstance(env.observation_space, spaces.Dict)

    for i in range(10):
        action = env.action_spaces_dict[env.actor_id()[0]].sample()
        observation, _, _, _ = env.step(action)

    env.close()
Example #18
0
def test_records_policy_events():
    env = build_dummy_maze_env()
    env = LogStatsWrapper.wrap(env)

    base_events = env.core_env.context.event_service.create_event_topic(
        BaseEnvEvents)
    env.reset()
    for i in range(5):
        base_events.test_event(
            1)  # Simulate firing event from policy (= outside of env.step)
        env.step(env.action_space.sample())

    env.write_epoch_stats()
    assert env.get_stats_value(
        BaseEnvEvents.test_event,
        LogStatsLevel.EPOCH) == 5  # value of 1 x 5 steps
Example #19
0
def test_dummy_model_with_dummy_network():
    """
    Unit test for the DummyStructuredEnvironment
    """
    maze_env = build_dummy_maze_env()

    # init the distribution_mapper with the flat action space
    distribution_mapper_config = [{
        "action_space":
        spaces.Box,
        "distribution":
        "maze.distributions.squashed_gaussian.SquashedGaussianProbabilityDistribution"
    }]
    distribution_mapper = DistributionMapper(
        action_space=maze_env.action_space,
        distribution_mapper_config=distribution_mapper_config)

    obs_shapes = observation_spaces_to_in_shapes(
        maze_env.observation_spaces_dict)

    dummy_actor = DummyPolicyNet(
        obs_shapes=obs_shapes[0],
        action_logits_shapes={
            key: distribution_mapper.required_logits_shape(key)
            for key in maze_env.action_space.spaces.keys()
        },
        non_lin=nn.Tanh)

    dummy_critic = DummyValueNet(obs_shapes=obs_shapes[0], non_lin=nn.Tanh)

    obs_np = maze_env.reset()
    obs = {k: torch.from_numpy(v) for k, v in obs_np.items()}

    for i in range(100):
        logits_dict = dummy_actor(obs)
        prob_dist = distribution_mapper.logits_dict_to_distribution(
            logits_dict=logits_dict, temperature=1.0)
        sampled_actions = prob_dist.sample()

        obs_np, _, _, _ = maze_env.step(sampled_actions)
        obs = {k: torch.from_numpy(v) for k, v in obs_np.items()}

        _ = dummy_critic(obs)
    maze_env.close()
Example #20
0
def test_time_limit_wrapper_with_spec():
    """ time limit wrapper unit tests """
    class Spec:
        def __init__(self):
            self.max_episode_steps = 5

    spec = Spec()

    env = build_dummy_maze_env()
    env.__setattr__("spec", spec)
    env = TimeLimitWrapper.wrap(env, max_episode_steps=None)

    env.seed(1234)
    env.reset()
    for i in range(5):
        obs, rew, done, info = env.step(env.action_space.sample())
        if i >= 4:
            assert done
    env.close()
def _test_dummy_env_for_discretization_config(
        discretization_config: Dict[str, Dict]) -> None:
    """Test the Discretize action wrapper on the dummy env for a given discretization_config.

    :param discretization_config: The Discretize actions config to apply and test.
    """
    base_env = build_dummy_maze_env()

    env = DiscretizeActionsWrapper.wrap(
        base_env, discretization_config=discretization_config)

    assert isinstance(env.action_space, spaces.Dict)
    for action_name, new_action_config in discretization_config.items():
        assert action_name in env.action_space.spaces.keys()
        if base_env.action_space[action_name].shape[-1] == 1:
            assert isinstance(env.action_space[action_name], spaces.Discrete)
            assert env.action_space[action_name].n == new_action_config[
                'num_bins']
        else:
            assert isinstance(env.action_space[action_name],
                              spaces.MultiDiscrete)
            assert np.all(env.action_space[action_name].nvec == \
                          np.array([new_action_config['num_bins']] * base_env.action_space[action_name].shape[-1]))

    env.reset()
    for i in range(1):
        action = env.action_space.sample()
        env.step(action)
        reverse_action = env.action(action)
        restored_action = env.reverse_action(reverse_action)
        assert isinstance(action, dict)
        for action_name, action_config in discretization_config.items():
            assert action_name in action
            if base_env.action_space[action_name].shape[-1] == 1:
                assert action[action_name] in spaces.Discrete(
                    action_config['num_bins'])
            else:
                nvec = np.array([action_config['num_bins']] *
                                base_env.action_space[action_name].shape[-1])
                assert action[action_name] in spaces.MultiDiscrete(nvec)
            assert reverse_action[action_name] in base_env.action_space[
                action_name]
            assert np.all(restored_action[action_name] == action[action_name])
Example #22
0
def test_records_stats():
    # the default simple setup: flat, single-step env, no step skipping etc.
    env = build_dummy_maze_env()
    env = LogStatsWrapper.wrap(env)

    env.reset()
    for i in range(5):
        env.step(env.action_space.sample())

    # both step counts seen from outside and seen from core env should correspond to 5

    env.write_epoch_stats()
    assert env.get_stats_value(RewardEvents.reward_original,
                               LogStatsLevel.EPOCH,
                               name="total_step_count") == 5

    assert env.get_stats_value(BaseEnvEvents.reward,
                               LogStatsLevel.EPOCH,
                               name="total_step_count") == 5
Example #23
0
def test_step_increment_in_single_step_core_env():
    """In single sub-step envs, events should be cleared out and env time incremented automatically."""
    env = build_dummy_maze_env()
    env = LogStatsWrapper.wrap(env)

    env.reset()
    assert env.get_env_time() == 0

    # 10 steps
    for _ in range(10):
        env.step(env.action_space.sample())

    assert env.get_env_time() == 10
    env.reset()

    increment_log_step()

    assert env.get_stats_value(BaseEnvEvents.reward,
                               LogStatsLevel.EPOCH,
                               name="total_step_count") == 10
def test_records_multiple_episodes():
    env = build_dummy_maze_env()
    env = SpacesRecordingWrapper.wrap(env, output_dir="space_records")

    env.reset()
    for _ in range(5):
        for _ in range(10):
            action = env.action_space.sample()
            env.step(action)
        env.reset()

    dumped_files = os.listdir("space_records")
    assert len(dumped_files) == 5

    for file_path in dumped_files:
        with open("space_records/" + file_path, "rb") as in_f:
            episode_record = pickle.load(in_f)

        assert isinstance(episode_record, SpacesTrajectoryRecord)
        assert len(episode_record.step_records) == 10
def test_compatibility_with_dataset():
    env = build_dummy_maze_env()
    env = SpacesRecordingWrapper.wrap(env, output_dir="space_records")

    # Generate 5 episodes, 10 steps each
    env.reset()
    for _ in range(5):
        for _ in range(10):
            action = env.action_space.sample()
            env.step(action)
        env.reset()

    dataset = InMemoryDataset(
        n_workers=2,
        conversion_env_factory=None,
        input_data="space_records",
        trajectory_processor=IdentityTrajectoryProcessor(),
        deserialize_in_main_thread=False)

    assert len(dataset) == 5 * 10
Example #26
0
def test_handles_step_skipping_in_reset():
    env = build_dummy_maze_env()
    env = _StepInResetWrapper.wrap(env)
    env = LogStatsWrapper.wrap(env)

    env.reset()
    # Step the env once (should be the third step -- first two were done in the reset)
    env.step(env.action_space.sample())

    # Events should be collected for 3 steps in total -- two from the env reset done by the wrapper + one done above
    assert len(env.episode_event_log.step_event_logs) == 3

    # The same goes for "original reward" stats
    env.write_epoch_stats()
    assert env.get_stats_value(RewardEvents.reward_original,
                               LogStatsLevel.EPOCH,
                               name="total_step_count") == 3

    # The step count from outside is still one (as normal reward events should not be fired for "skipped" steps)
    assert env.get_stats_value(BaseEnvEvents.reward,
                               LogStatsLevel.EPOCH,
                               name="total_step_count") == 1
Example #27
0
def test_handles_step_skipping_in_step():
    env = build_dummy_maze_env()
    env = _StepInStepWrapper.wrap(env)
    env = LogStatsWrapper.wrap(env)

    # Step the env twice (should correspond to four core-env steps)
    env.reset()
    for i in range(2):
        env.step(env.action_space.sample())

    # => events should be collected for 4 steps in total
    assert len(env.episode_event_log.step_event_logs) == 4

    # The same goes for "original reward" stats
    env.write_epoch_stats()
    assert env.get_stats_value(RewardEvents.reward_original,
                               LogStatsLevel.EPOCH,
                               name="total_step_count") == 4

    # The step count from outside is still just two (as normal reward events should not be fired for "skipped" steps)
    assert env.get_stats_value(BaseEnvEvents.reward,
                               LogStatsLevel.EPOCH,
                               name="total_step_count") == 2
def test_records_episode_with_correct_data():
    env = build_dummy_maze_env()
    env = SpacesRecordingWrapper.wrap(env, output_dir="space_records")

    actions = []
    observations = []

    observation = env.reset()
    for _ in range(5):
        observations.append(observation)
        action = env.action_space.sample()
        actions.append(action)
        observation, _, _, _ = env.step(action)

    episode_id = env.get_episode_id()
    expected_file_path = str(episode_id) + ".pkl"
    assert not expected_file_path in os.listdir("space_records")

    # Now dump and load the data
    env.reset()
    assert expected_file_path in os.listdir("space_records")
    with open("space_records/" + expected_file_path, "rb") as in_f:
        episode_record = pickle.load(in_f)

    # Check the contents
    assert isinstance(episode_record, SpacesTrajectoryRecord)
    assert len(episode_record.step_records) == len(actions)
    for record, observation, action in zip(episode_record.step_records,
                                           observations, actions):
        assert len(record.actions) == 1 and len(
            record.observations) == 1  # single-step env
        for obs_key in observation:
            assert np.allclose(record.observations[0][obs_key],
                               observation[obs_key])
        for act_key in action:
            assert np.allclose(record.actions[0][act_key], action[act_key])
def test_log_action_events_dict_discrete():
    """ action logging unit tests """
    env = build_dummy_maze_env()
    train(env)
Example #30
0
def test_default_action_space_sampling():
    env = build_dummy_maze_env()
    policy = RandomPolicy(env.action_spaces_dict)
    action = policy.compute_action(observation=env.observation_space.sample(), maze_state=None)
    assert action in env.action_space