def _check_trajectories_dynamics(trajectories): for idx in range(len(trajectories["ep_observations"])): states, actions, rewards = trajectories["ep_observations"][ idx], trajectories["ep_actions"][idx], trajectories[ "ep_rewards"][idx] mdp_params, env_params = trajectories["mdp_params"][ idx], trajectories["env_params"][idx] assert len(states) == len(actions) == len( rewards), "# states {}\t# actions {}\t# rewards {}".format( len(states), len(actions), len(rewards)) # Checking that actions would give rise to same behaviour in current MDP simulation_env = OvercookedEnv( OvercookedGridworld.from_layout_name(**mdp_params), **env_params) for i in range(len(states) - 1): curr_state = states[i] simulation_env.state = curr_state next_state, reward, done, info = simulation_env.step( actions[i]) assert states[ i + 1] == next_state, "States differed (expected vs actual): {}".format( simulation_env.display_states(states[i + 1], next_state)) assert rewards[i] == reward, "{} \t {}".format( rewards[i], reward)
class TestOvercookedEnvironment(unittest.TestCase): def setUp(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") self.env = OvercookedEnv(self.base_mdp, **DEFAULT_ENV_PARAMS) self.rnd_agent_pair = AgentPair(FixedPlanAgent([stay, w, w]), FixedPlanAgent([stay, e, e])) np.random.seed(0) def test_constructor(self): try: OvercookedEnv(self.base_mdp, horizon=10) except Exception as e: self.fail("Failed to instantiate OvercookedEnv:\n{}".format(e)) with self.assertRaises(TypeError): OvercookedEnv(self.base_mdp, **{"invalid_env_param": None}) def test_step_fn(self): for _ in range(10): joint_action = random_joint_action() self.env.step(joint_action) def test_execute_plan(self): action_plan = [random_joint_action() for _ in range(10)] self.env.execute_plan(self.base_mdp.get_standard_start_state(), action_plan) def test_run_agents(self): start_state = self.env.state self.env.run_agents(self.rnd_agent_pair) self.assertNotEqual(self.env.state, start_state) def test_rollouts(self): try: self.env.get_rollouts(self.rnd_agent_pair, 3) except Exception as e: self.fail("Failed to get rollouts from environment:\n{}".format(e)) def test_one_player_env(self): mdp = OvercookedGridworld.from_layout_name("cramped_room_single") env = OvercookedEnv(mdp, horizon=12) a0 = FixedPlanAgent([stay, w, w, e, e, n, e, interact, w, n, interact]) ag = AgentGroup(a0) env.run_agents(ag, display=False) self.assertEqual(env.state.players_pos_and_or, (((2, 1), (0, -1)), )) def test_four_player_env_fixed(self): mdp = OvercookedGridworld.from_layout_name("multiplayer_schelling") assert mdp.num_players == 4 env = OvercookedEnv(mdp, horizon=16) a0 = FixedPlanAgent([stay, w, w]) a1 = FixedPlanAgent([ stay, stay, e, e, n, n, n, e, interact, n, n, w, w, w, n, interact, e ]) a2 = FixedPlanAgent( [stay, w, interact, n, n, e, e, e, n, e, n, interact, w]) a3 = FixedPlanAgent([e, interact, n, n, w, w, w, n, interact, e, s]) ag = AgentGroup(a0, a1, a2, a3) env.run_agents(ag, display=False) self.assertEqual(env.state.players_pos_and_or, (((1, 1), (-1, 0)), ((3, 1), (0, -1)), ((2, 1), (-1, 0)), ((4, 2), (0, 1)))) def test_multiple_mdp_env(self): mdp0 = OvercookedGridworld.from_layout_name("cramped_room") mdp1 = OvercookedGridworld.from_layout_name("counter_circuit") mdp_fn = lambda: np.random.choice([mdp0, mdp1]) # Default env env = OvercookedEnv(mdp_fn, horizon=100) env.get_rollouts(self.rnd_agent_pair, 5) def test_starting_position_randomization(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") start_state_fn = self.base_mdp.get_random_start_state_fn( random_start_pos=True, rnd_obj_prob_thresh=0.0) env = OvercookedEnv(self.base_mdp, start_state_fn) start_state = env.state.players_pos_and_or for _ in range(3): env.reset() print(env) curr_terrain = env.state.players_pos_and_or self.assertFalse(np.array_equal(start_state, curr_terrain)) def test_starting_obj_randomization(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") start_state_fn = self.base_mdp.get_random_start_state_fn( random_start_pos=False, rnd_obj_prob_thresh=0.8) env = OvercookedEnv(self.base_mdp, start_state_fn) start_state = env.state.all_objects_list for _ in range(3): env.reset() print(env) curr_terrain = env.state.all_objects_list self.assertFalse(np.array_equal(start_state, curr_terrain)) def test_failing_rnd_layout(self): with self.assertRaises(TypeError): mdp_gen_params = {"None": None} mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(**mdp_gen_params) OvercookedEnv(mdp=mdp_fn, **DEFAULT_ENV_PARAMS) def test_random_layout(self): mdp_gen_params = {"prop_feats": (1, 1)} mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(**mdp_gen_params) env = OvercookedEnv(mdp=mdp_fn, **DEFAULT_ENV_PARAMS) start_terrain = env.mdp.terrain_mtx for _ in range(3): env.reset() print(env) curr_terrain = env.mdp.terrain_mtx self.assertFalse(np.array_equal(start_terrain, curr_terrain)) mdp_gen_params = { "mdp_choices": ['cramped_room', 'asymmetric_advantages'] } mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(**mdp_gen_params) env = OvercookedEnv(mdp=mdp_fn, **DEFAULT_ENV_PARAMS) layouts_seen = [] for _ in range(10): layouts_seen.append(env.mdp.terrain_mtx) env.reset() all_same_layout = all([ np.array_equal(env.mdp.terrain_mtx, terrain) for terrain in layouts_seen ]) self.assertFalse(all_same_layout)