def test_step_1_valid_when_env_not_done(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, _ = helpers.get_wrapped_env(env_spec) # Seed environment since we are sampling actions. # We need to seed env and action space. random_seed = 42 wrapped_env.seed(random_seed) helpers.seed_action_space(wrapped_env, random_seed) # Get agent names from env agents = wrapped_env.agents # Parallel env_types if env_spec.env_type == EnvType.Parallel: test_agents_actions = { agent: wrapped_env.action_spaces[agent].sample() for agent in agents } with patch.object(wrapped_env, "step") as parallel_step: parallel_step.return_value = None, None, None, None _ = wrapped_env.step(test_agents_actions) parallel_step.assert_called_once_with(test_agents_actions) # Sequential env_types elif env_spec.env_type == EnvType.Sequential: for agent in agents: with patch.object(wrapped_env, "step") as seq_step: seq_step.return_value = None test_agent_action = wrapped_env.action_spaces[ agent].sample() _ = wrapped_env.step(test_agent_action) seq_step.assert_called_once_with(test_agent_action)
def test_wrapper_env_reset(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, _ = helpers.get_wrapped_env(env_spec) num_agents = len(wrapped_env.agents) timestep = wrapped_env.reset() if type(timestep) == tuple: dm_env_timestep, env_extras = timestep else: dm_env_timestep = timestep props_which_should_not_be_none = [ dm_env_timestep, dm_env_timestep.observation ] assert helpers.verify_all_props_not_none( props_which_should_not_be_none), "Failed to ini dm_env_timestep." assert (dm_env_timestep.step_type == dm_env.StepType.FIRST ), "Failed to have correct StepType." if (env_spec.env_name == "tic_tac_toe" and env_spec.env_source == EnvSource.OpenSpiel and env_spec.env_type == EnvType.Sequential): pytest.skip( "This test is only applicable to parralel wrappers and only works " "for the provided PZ sequential envs because they have 3 agents, and" "an OLT has length of 3 (a bug, i'd say)") assert (len(dm_env_timestep.observation) == num_agents ), "Failed to generate observation for all agents." assert wrapped_env._reset_next_step is False, "_reset_next_step not set." helpers.assert_env_reset(wrapped_env, dm_env_timestep, env_spec)
def test_step_0_valid_when_env_not_done(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, _ = helpers.get_wrapped_env(env_spec) # Seed environment since we are sampling actions. # We need to seed env and action space. random_seed = 42 wrapped_env.seed(random_seed) helpers.seed_action_space(wrapped_env, random_seed) # Get agent names from env agents = wrapped_env.agents timestep = wrapped_env.reset() if type(timestep) == tuple: initial_dm_env_timestep, env_extras = timestep else: initial_dm_env_timestep = timestep # Parallel env_types if env_spec.env_type == EnvType.Parallel: test_agents_actions = { agent: wrapped_env.action_spaces[agent].sample() for agent in agents } curr_dm_timestep = wrapped_env.step(test_agents_actions) for agent in wrapped_env.agents: assert not np.array_equal( initial_dm_env_timestep.observation[agent].observation, curr_dm_timestep.observation[agent].observation, ), "Failed to update observations." # Sequential env_types elif env_spec.env_type == EnvType.Sequential: curr_dm_timestep = initial_dm_env_timestep for agent in agents: if env_spec.env_source == EnvSource.OpenSpiel: test_agent_actions = np.random.choice( np.where( curr_dm_timestep.observation.legal_actions)[0]) else: test_agent_actions = wrapped_env.action_spaces[ agent].sample() curr_dm_timestep = wrapped_env.step(test_agent_actions) assert not np.array_equal( initial_dm_env_timestep.observation.observation, curr_dm_timestep.observation.observation, ), "Failed to update observations." assert (wrapped_env._reset_next_step is False), "Failed to set _reset_next_step correctly." assert curr_dm_timestep.reward is not None, "Failed to set rewards." assert (curr_dm_timestep.step_type is dm_env.StepType.MID), "Failed to update step type."
def test_valid_episode(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, specs = helpers.get_wrapped_env(env_spec) env_loop_func = helpers.get_env_loop(env_spec) env_loop = env_loop_func( wrapped_env, MockedSystem(specs), ) result = env_loop.run_episode() helpers.assert_valid_episode(result)
def test_valid_multiple_episodes(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, specs = helpers.get_wrapped_env(env_spec) env_loop_func = helpers.get_env_loop(env_spec) train_loop = env_loop_func(wrapped_env, MockedSystem(specs), label="train_loop") eval_loop = env_loop_func(wrapped_env, MockedExecutor(specs), label="eval_loop") num_episodes = 10 num_episodes_per_eval = 2 for _ in range(num_episodes // num_episodes_per_eval): train_loop.run(num_episodes=num_episodes_per_eval) eval_loop.run(num_episodes=1)
def test_preprocess_wrapper_obs_1_standardize( self, env_spec: EnvSpec, helpers: Helpers, monkeypatch: MonkeyPatch, ) -> None: # Parallel env_types if env_spec.env_type == EnvType.Parallel: StandardizeObservation = StandardizeObservationParallel # Sequential env_types elif env_spec.env_type == EnvType.Sequential: StandardizeObservation = StandardizeObservationSequential wrapped_env, _ = helpers.get_wrapped_env( env_spec, env_preprocess_wrappers=[ (dtype_v0, { "dtype": np.float32 }), (StandardizeObservation, None), ], ) _ = wrapped_env.reset() agents = wrapped_env.agents # Parallel env_types if env_spec.env_type == EnvType.Parallel: test_agents_actions = { agent: wrapped_env.action_spaces[agent].sample() for agent in agents } for i in range(50): curr_dm_timestep = wrapped_env.step(test_agents_actions) helpers.verify_observations_are_standardized( curr_dm_timestep.observation, agents, env_spec) # Sequential env_types elif env_spec.env_type == EnvType.Sequential: for i in range(50): for agent in agents: test_agent_actions = wrapped_env.action_spaces[ agent].sample() curr_dm_timestep = wrapped_env.step(test_agent_actions) helpers.verify_observations_are_standardized( curr_dm_timestep.observation, agents, env_spec)
def test_covert_env_to_dm_env_1_with_action_mask(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, _ = helpers.get_wrapped_env(env_spec) # Does the wrapper have the functions we want to test if hasattr(wrapped_env, "_convert_observations") or hasattr( wrapped_env, "_convert_observation"): # Get agent names from env and mock out data agents = wrapped_env.agents test_agents_observations = {} for agent in agents: # TODO If cont action space masking is implemented - Update test_agents_observations[agent] = { "observation": np.random.rand( *wrapped_env.observation_spaces[agent].shape), "action_mask": np.random.randint( 2, size=wrapped_env.action_spaces[agent].shape), } # Parallel env_types if env_spec.env_type == EnvType.Parallel: dm_env_timestep = wrapped_env._convert_observations( test_agents_observations, dones={agent: False for agent in agents}, ) for agent in wrapped_env.agents: np.testing.assert_array_equal( test_agents_observations[agent].get("observation"), dm_env_timestep[agent].observation, ) np.testing.assert_array_equal( test_agents_observations[agent].get("action_mask"), dm_env_timestep[agent].legal_actions, ) assert (bool(dm_env_timestep[agent].terminal) is False), "Failed to set terminal." # Sequential env_types elif env_spec.env_type == EnvType.Sequential: for agent in agents: dm_env_timestep = wrapped_env._convert_observation( agent, test_agents_observations[agent], done=False) np.testing.assert_array_equal( test_agents_observations[agent].get("observation"), dm_env_timestep.observation, ) np.testing.assert_array_equal( test_agents_observations[agent].get("action_mask"), dm_env_timestep.legal_actions, ) assert (bool(dm_env_timestep.terminal) is False), "Failed to set terminal."
def test_initialize_env_loop(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, specs = helpers.get_wrapped_env(env_spec) env_loop_func = helpers.get_env_loop(env_spec) env_loop = env_loop_func( wrapped_env, MockedSystem(specs), ) props_which_should_not_be_none = [ env_loop, env_loop._environment, env_loop._executor, env_loop._counter, env_loop._logger, env_loop._should_update, ] assert helpers.verify_all_props_not_none( props_which_should_not_be_none ), "Failed to initialize env loop."
def test_wrapper_initialization(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, _ = helpers.get_wrapped_env(env_spec) num_agents = len(wrapped_env.agents) props_which_should_not_be_none = [ wrapped_env, wrapped_env.environment, wrapped_env.observation_spec(), wrapped_env.action_spec(), wrapped_env.reward_spec(), wrapped_env.discount_spec(), ] assert helpers.verify_all_props_not_none( props_which_should_not_be_none), "Failed to ini wrapped env." assert (len(wrapped_env.observation_spec()) == num_agents ), "Failed to generate observation specs for all agents." assert (len(wrapped_env.action_spec()) == num_agents ), "Failed to generate action specs for all agents." assert (len(wrapped_env.reward_spec()) == num_agents ), "Failed to generate reward specs for all agents." assert (len(wrapped_env.discount_spec()) == num_agents ), "Failed to generate discount specs for all agents."
def test_preprocess_wrapper_obs_0_normalize( self, env_spec: EnvSpec, helpers: Helpers, monkeypatch: MonkeyPatch, ) -> None: wrapped_env, _ = helpers.get_wrapped_env( env_spec, env_preprocess_wrappers=[ (dtype_v0, { "dtype": np.float32 }), (normalize_obs_v0, None), ], ) timestep = wrapped_env.reset() if type(timestep) == tuple: initial_dm_env_timestep, env_extras = timestep else: initial_dm_env_timestep = timestep agents = wrapped_env.agents helpers.verify_observations_are_normalized( initial_dm_env_timestep.observation, agents, env_spec) # Parallel env_types if env_spec.env_type == EnvType.Parallel: for i in range(50): test_agents_actions = { agent: wrapped_env.action_spaces[agent].sample() for agent in agents } curr_dm_timestep = wrapped_env.step(test_agents_actions) helpers.verify_observations_are_normalized( curr_dm_timestep.observation, agents, env_spec) # Sequential env_types elif env_spec.env_type == EnvType.Sequential: for i in range(50): for agent in agents: test_agent_actions = wrapped_env.action_spaces[ agent].sample() curr_dm_timestep = wrapped_env.step(test_agent_actions) helpers.verify_observations_are_normalized( curr_dm_timestep.observation, agents, env_spec)
def test_covert_env_to_dm_env_0_no_action_mask(self, env_spec: EnvSpec, helpers: Helpers) -> None: wrapped_env, _ = helpers.get_wrapped_env(env_spec) # Does the wrapper have the functions we want to test if hasattr(wrapped_env, "_convert_observations") or hasattr( wrapped_env, "_convert_observation"): # Get agent names from env and mock out data agents = wrapped_env.agents test_agents_observations = { agent: np.random.rand(*wrapped_env.observation_spaces[agent].shape) for agent in agents } # Parallel env_types if env_spec.env_type == EnvType.Parallel: dm_env_timestep = wrapped_env._convert_observations( test_agents_observations, dones={agent: False for agent in agents}) for agent in wrapped_env.agents: np.testing.assert_array_equal( test_agents_observations[agent], dm_env_timestep[agent].observation, ) assert (bool(dm_env_timestep[agent].terminal) is False), "Failed to set terminal." # Sequential env_types elif env_spec.env_type == EnvType.Sequential: for agent in agents: dm_env_timestep = wrapped_env._convert_observation( agent, test_agents_observations[agent], done=False) np.testing.assert_array_equal( test_agents_observations[agent], dm_env_timestep.observation, ) assert (bool(dm_env_timestep.terminal) is False), "Failed to set terminal."
def test_preprocess_wrapper_reward_1_custom_function( self, env_spec: EnvSpec, helpers: Helpers, monkeypatch: MonkeyPatch, ) -> None: wrapped_env, _ = helpers.get_wrapped_env( env_spec, env_preprocess_wrappers=[ (dtype_v0, { "dtype": np.float32 }), (reward_lambda_v0, { "change_reward_fn": lambda r: r + 100 }), ], ) _ = wrapped_env.reset() agents = wrapped_env.agents # Parallel env_types if env_spec.env_type == EnvType.Parallel: test_agents_actions = { agent: wrapped_env.action_spaces[agent].sample() for agent in agents } curr_dm_timestep = wrapped_env.step(test_agents_actions) for agent in agents: assert (curr_dm_timestep.reward[agent] >= 100), "Failed custom reward shaping. " # Sequential env_types elif env_spec.env_type == EnvType.Sequential: for agent in agents: test_agent_actions = wrapped_env.action_spaces[agent].sample() curr_dm_timestep = wrapped_env.step(test_agent_actions) assert curr_dm_timestep.reward >= 100, "Failed custom reward shaping. "
def test_preprocess_wrapper_reward_0_normalize( self, env_spec: EnvSpec, helpers: Helpers, monkeypatch: MonkeyPatch, ) -> None: min = 0.2 max = 1 # Parallel env_types if env_spec.env_type == EnvType.Parallel: StandardizeReward = StandardizeRewardParallel # Sequential env_types elif env_spec.env_type == EnvType.Sequential: StandardizeReward = StandardizeRewardSequential wrapped_env, _ = helpers.get_wrapped_env( env_spec, env_preprocess_wrappers=[ (dtype_v0, { "dtype": np.float32 }), (StandardizeReward, { "lower_bound": min, "upper_bound": max }), ], ) _ = wrapped_env.reset() agents = wrapped_env.agents # Parallel env_types if env_spec.env_type == EnvType.Parallel: for i in range(50): test_agents_actions = { agent: wrapped_env.action_spaces[agent].sample() for agent in agents } curr_dm_timestep = wrapped_env.step(test_agents_actions) helpers.verify_reward_is_normalized(curr_dm_timestep.reward, agents, env_spec, min=min, max=max) # Sequential env_types elif env_spec.env_type == EnvType.Sequential: for i in range(50): for agent in agents: test_agent_actions = wrapped_env.action_spaces[ agent].sample() curr_dm_timestep = wrapped_env.step(test_agent_actions) helpers.verify_reward_is_normalized( curr_dm_timestep.reward, agents, env_spec, min=min, max=max)
def test_loadmodule(self, env_spec: EnvSpec, helpers: Helpers) -> None: env = helpers.get_env(env_spec) props_which_should_not_be_none = [env, env.agents, env.possible_agents] assert helpers.verify_all_props_not_none( props_which_should_not_be_none), "Failed to load module"
def test_step_2_invalid_when_env_done(self, env_spec: EnvSpec, helpers: Helpers, monkeypatch: MonkeyPatch) -> None: wrapped_env, _ = helpers.get_wrapped_env(env_spec) if env_spec.env_source == EnvSource.OpenSpiel: pytest.skip("Open Spiel does not use the .last() method") # Seed environment since we are sampling actions. # We need to seed env and action space. random_seed = 42 wrapped_env.seed(random_seed) helpers.seed_action_space(wrapped_env, random_seed) # Get agent names from env _ = wrapped_env.reset() agents = wrapped_env.agents # Parallel env_types if env_spec.env_type == EnvType.Parallel: test_agents_actions = { agent: wrapped_env.action_spaces[agent].sample() for agent in agents } monkeypatch.setattr(wrapped_env, "env_done", helpers.mock_done) curr_dm_timestep = wrapped_env.step(test_agents_actions) helpers.assert_env_reset(wrapped_env, curr_dm_timestep, env_spec) # Sequential env_types # TODO (Kale-ab): Make this part below less reliant on PZ. elif env_spec.env_type == EnvType.Sequential: n_agents = wrapped_env.num_agents # Mock functions to act like PZ environment is done def mock_environment_last() -> Any: observe = wrapped_env.observation_spaces[agent].sample() reward = 0.0 done = True info: Dict = {} return observe, reward, done, info def mock_step(action: types.Action) -> None: return # Mocks certain functions - if functions don't exist, error is not thrown. monkeypatch.setattr(wrapped_env._environment, "last", mock_environment_last, raising=False) monkeypatch.setattr(wrapped_env._environment, "step", mock_step, raising=False) for index, (agent) in enumerate(wrapped_env.agent_iter(n_agents)): test_agent_actions = wrapped_env.action_spaces[agent].sample() # Mock whole env being done when you reach final agent if index == n_agents - 1: monkeypatch.setattr( wrapped_env, "env_done", helpers.mock_done, ) # Mock update has occurred in step monkeypatch.setattr(wrapped_env._environment, "_has_updated", True, raising=False) curr_dm_timestep = wrapped_env.step(test_agent_actions) # Check each agent is on last step assert (curr_dm_timestep.step_type is dm_env.StepType.LAST), "Failed to update step type." helpers.assert_env_reset(wrapped_env, curr_dm_timestep, env_spec) assert (wrapped_env._reset_next_step is True), "Failed to set _reset_next_step correctly." assert (curr_dm_timestep.step_type is dm_env.StepType.LAST), "Failed to update step type."