Ejemplo n.º 1
0
    def check_joint_plan(self,
                         joint_motion_planner,
                         start,
                         goal,
                         times=None,
                         min_t=None,
                         display=False):
        """Runs the plan in the environment and checks that the intended goals are achieved."""
        debug = False
        action_plan, end_pos_and_orients, plan_lengths = joint_motion_planner.get_low_level_action_plan(
            start, goal)
        if debug:
            print("Start state: {}, Goal state: {}, Action plan: {}".format(
                start, goal, action_plan))

        start_state = OvercookedState(
            [P(*start[0]), P(*start[1])], {}, order_list=['any', 'any'])
        env = OvercookedEnv(joint_motion_planner.mdp, horizon=1000)
        resulting_state, _ = env.execute_plan(start_state,
                                              action_plan,
                                              display=display)

        self.assertTrue(
            any([
                agent_goal in resulting_state.players_pos_and_or
                for agent_goal in goal
            ]))
        self.assertEqual(resulting_state.players_pos_and_or,
                         end_pos_and_orients)
        self.assertEqual(len(action_plan), min(plan_lengths))

        if min_t is not None: self.assertEqual(len(action_plan), min_t)
        if times is not None: self.assertEqual(plan_lengths, times)
Ejemplo n.º 2
0
    def test_random_layout_feature_types(self):
        mandatory_features = {POT, DISH_DISPENSER, SERVING_LOC}
        optional_features = {ONION_DISPENSER, TOMATO_DISPENSER}
        optional_features_combinations = [{ONION_DISPENSER, TOMATO_DISPENSER}, {ONION_DISPENSER}, {TOMATO_DISPENSER}]

        for optional_features_combo in optional_features_combinations:
            left_out_optional_features = optional_features - optional_features_combo
            used_features = list(optional_features_combo | mandatory_features)
            mdp_gen_params = {"prop_feats": 0.9,
                            "feature_types": used_features,
                            "prop_empty": 0.1,
                            "inner_shape": (6, 5),
                            "display": False,
                            "start_all_orders" : [
                                { "ingredients" : ["onion", "onion", "onion"]}
                            ]}
            mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(mdp_gen_params, outer_shape=(6, 5))
            env = OvercookedEnv(mdp_fn, **DEFAULT_ENV_PARAMS)
            for _ in range(10):
                env.reset()
                curr_terrain = env.mdp.terrain_mtx
                terrain_features = set.union(*(set(line) for line in curr_terrain))
                self.assertTrue(all(elem in terrain_features for elem in used_features)) # all used_features are actually used
                if left_out_optional_features:
                    self.assertFalse(any(elem in terrain_features for elem in left_out_optional_features)) # all left_out optional_features are not used
Ejemplo n.º 3
0
    def __init__(self,
                 mdp_params,
                 env_params={},
                 mdp_fn_params=None,
                 force_compute=False,
                 mlp_params=NO_COUNTERS_PARAMS,
                 debug=False):
        """
        mdp_params (dict): params for creation of an OvercookedGridworld instance through the `from_layout_name` method
        env_params (dict): params for creation of an OvercookedEnv
        mdp_fn_params (dict): params to setup random MDP generation
        force_compute (bool): whether should re-compute MediumLevelPlanner although matching file is found
        mlp_params (dict): params for MediumLevelPlanner
        """
        assert type(mdp_params) is dict, "mdp_params must be a dictionary"

        if mdp_fn_params is None:
            self.variable_mdp = False
            self.mdp_fn = lambda: OvercookedGridworld.from_layout_name(
                **mdp_params)
        else:
            self.variable_mdp = True
            self.mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(
                mdp_params, **mdp_fn_params)

        self.env = OvercookedEnv(self.mdp_fn, **env_params)
        self.force_compute = force_compute
        self.debug = debug
        self.mlp_params = mlp_params
        self._mlp = None
Ejemplo n.º 4
0
 def test_scenario_1(self):
     # Myopic corridor collision
     #
     # X X X X X O X D X X X X X
     # X   ↓Ho     X           X
     # X     X X X X X X X ↓R  X
     # X                       X
     # X S X X X X X X X X P P X
     #
     # H on left with onion, further away to the tunnel entrance than R.
     # Optimal planner tells R to go first and that H will wait
     # for R to pass. H however, starts going through the tunnel
     # and they get stuck. The H plan is a bit extreme (it would probably
     # realize that it should retrace it's steps at some point)
     scenario_1_mdp = OvercookedGridworld.from_layout_name(
         'small_corridor', start_order_list=['any'], cook_time=5)
     mlp = MediumLevelPlanner.from_pickle_or_compute(
         scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlp)
     a1 = CoupledPlanningAgent(mlp)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState(
         [P((2, 1), s, Obj('onion', (2, 1))),
          P((10, 2), s)], {},
         order_list=['onion'])
     env = OvercookedEnv(scenario_1_mdp, start_state_fn=lambda: start_state)
     env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
Ejemplo n.º 5
0
class TestFeaturizations(unittest.TestCase):
    def setUp(self):
        self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
        self.mlp = MediumLevelPlanner.from_pickle_or_compute(
            self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True)
        self.env = OvercookedEnv(self.base_mdp, **DEFAULT_ENV_PARAMS)
        self.rnd_agent_pair = AgentPair(GreedyHumanModel(self.mlp),
                                        GreedyHumanModel(self.mlp))
        np.random.seed(0)

    def test_lossless_state_featurization(self):
        trajs = self.env.get_rollouts(self.rnd_agent_pair, num_games=5)
        featurized_observations = [[
            self.base_mdp.lossless_state_encoding(state) for state in ep_states
        ] for ep_states in trajs["ep_observations"]]
        expected_featurization = load_pickle(
            "data/testing/lossless_state_featurization")
        self.assertTrue(
            np.array_equal(expected_featurization, featurized_observations))

    def test_state_featurization(self):
        trajs = self.env.get_rollouts(self.rnd_agent_pair, num_games=5)
        featurized_observations = [[
            self.base_mdp.featurize_state(state, self.mlp)
            for state in ep_states
        ] for ep_states in trajs["ep_observations"]]
        expected_featurization = load_pickle(
            "data/testing/state_featurization")
        self.assertTrue(
            np.array_equal(expected_featurization, featurized_observations))
Ejemplo n.º 6
0
 def test_one_player_env(self):
     mdp = OvercookedGridworld.from_layout_name("cramped_room_single")
     env = OvercookedEnv(mdp, horizon=12)
     a0 = FixedPlanAgent([stay, w, w, e, e, n, e, interact, w, n, interact])
     ag = AgentGroup(a0)
     env.run_agents(ag, display=False)
     self.assertEqual(env.state.players_pos_and_or, (((2, 1), (0, -1)), ))
Ejemplo n.º 7
0
    def _check_trajectories_dynamics(trajectories):
        for idx in range(len(trajectories["ep_observations"])):
            states, actions, rewards = trajectories["ep_observations"][
                idx], trajectories["ep_actions"][idx], trajectories[
                    "ep_rewards"][idx]
            mdp_params, env_params = trajectories["mdp_params"][
                idx], trajectories["env_params"][idx]

            assert len(states) == len(actions) == len(
                rewards), "# states {}\t# actions {}\t# rewards {}".format(
                    len(states), len(actions), len(rewards))

            # Checking that actions would give rise to same behaviour in current MDP
            simulation_env = OvercookedEnv(
                OvercookedGridworld.from_layout_name(**mdp_params),
                **env_params)
            for i in range(len(states) - 1):
                curr_state = states[i]
                simulation_env.state = curr_state

                next_state, reward, done, info = simulation_env.step(
                    actions[i])

                assert states[
                    i +
                    1] == next_state, "States differed (expected vs actual): {}".format(
                        simulation_env.display_states(states[i + 1],
                                                      next_state))
                assert rewards[i] == reward, "{} \t {}".format(
                    rewards[i], reward)
Ejemplo n.º 8
0
    def check_single_motion_plan(self,
                                 motion_planner,
                                 start_pos_and_or,
                                 goal_pos_and_or,
                                 expected_length=None):
        dummy_agent = P((3, 2), n)
        start_state = OvercookedState([P(*start_pos_and_or), dummy_agent], {},
                                      order_list=['any', 'any'])
        action_plan, pos_and_or_plan, plan_cost = motion_planner.get_plan(
            start_pos_and_or, goal_pos_and_or)

        # Checking that last state obtained matches goal position
        self.assertEqual(pos_and_or_plan[-1], goal_pos_and_or)

        # In single motion plans the graph cost should be equal to
        # the plan cost (= plan length) as agents should never STAY
        graph_plan_cost = sum(
            [motion_planner._graph_action_cost(a) for a in action_plan])
        self.assertEqual(plan_cost, graph_plan_cost)

        joint_action_plan = [(a, stay) for a in action_plan]
        env = OvercookedEnv(motion_planner.mdp, horizon=1000)
        resulting_state, _ = env.execute_plan(start_state, joint_action_plan)
        self.assertEqual(resulting_state.players_pos_and_or[0],
                         goal_pos_and_or)

        if expected_length is not None:
            self.assertEqual(len(action_plan), expected_length)
Ejemplo n.º 9
0
 def test_display_phi(self):
     mdp0 = OvercookedGridworld.from_layout_name("cramped_room")
     mdp_fn = lambda _ignored: mdp0
     env = OvercookedEnv(mdp_fn, horizon=20)
     env.get_rollouts(self.rnd_agent_pair,
                      1,
                      display=True,
                      display_phi=True)
Ejemplo n.º 10
0
 def setUp(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     self.mlp = MediumLevelPlanner.from_pickle_or_compute(
         self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True)
     self.env = OvercookedEnv(self.base_mdp, **DEFAULT_ENV_PARAMS)
     self.rnd_agent_pair = AgentPair(GreedyHumanModel(self.mlp),
                                     GreedyHumanModel(self.mlp))
     np.random.seed(0)
Ejemplo n.º 11
0
    def test_constructor(self):
        try:
            OvercookedEnv(self.base_mdp, horizon=10)
        except Exception as e:
            self.fail("Failed to instantiate OvercookedEnv:\n{}".format(e))

        with self.assertRaises(TypeError):
            OvercookedEnv(self.base_mdp, **{"invalid_env_param": None})
Ejemplo n.º 12
0
    def test_multiple_mdp_env(self):
        mdp0 = OvercookedGridworld.from_layout_name("cramped_room")
        mdp1 = OvercookedGridworld.from_layout_name("counter_circuit")
        mdp_fn = lambda: np.random.choice([mdp0, mdp1])

        # Default env
        env = OvercookedEnv(mdp_fn, horizon=100)
        env.get_rollouts(self.rnd_agent_pair, 5)
Ejemplo n.º 13
0
 def test_one_coupled_one_fixed(self):
     a0 = CoupledPlanningAgent(self.mlp_large)
     a1 = FixedPlanAgent([s, e, n, w])
     agent_pair = AgentPair(a0, a1)
     env = OvercookedEnv(large_mdp, horizon=10)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     self.assertEqual(time_taken, 10)
Ejemplo n.º 14
0
 def test_starting_obj_randomization(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     start_state_fn = self.base_mdp.get_random_start_state_fn(
         random_start_pos=False, rnd_obj_prob_thresh=0.8)
     env = OvercookedEnv(self.base_mdp, start_state_fn)
     start_state = env.state.all_objects_list
     for _ in range(3):
         env.reset()
         print(env)
         curr_terrain = env.state.all_objects_list
         self.assertFalse(np.array_equal(start_state, curr_terrain))
Ejemplo n.º 15
0
 def test_starting_position_randomization(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("simple")
     start_state_fn = self.base_mdp.get_random_start_state_fn(
         random_start_pos=True, rnd_obj_prob_thresh=0.0)
     env = OvercookedEnv(self.base_mdp, start_state_fn)
     start_state = env.state.players_pos_and_or
     for _ in range(3):
         env.reset()
         print(env)
         curr_terrain = env.state.players_pos_and_or
         self.assertFalse(np.array_equal(start_state, curr_terrain))
Ejemplo n.º 16
0
 def test_fixed_plan_agents(self):
     a0 = FixedPlanAgent([s, e, n, w])
     a1 = FixedPlanAgent([s, w, n, e])
     agent_pair = AgentPair(a0, a1)
     env = OvercookedEnv(large_mdp, horizon=10)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(time_taken, 10)
     self.assertEqual(env.mdp.get_standard_start_state().player_positions,
                      end_state.player_positions)
Ejemplo n.º 17
0
 def check_full_plan(self, start_state, planner, debug=False):
     heuristic = Heuristic(planner.mp)
     joint_action_plan = planner.get_low_level_action_plan(
         start_state,
         heuristic.simple_heuristic,
         debug=debug,
         goal_info=debug)
     env = OvercookedEnv(planner.mdp, horizon=1000)
     resulting_state, _ = env.execute_plan(start_state,
                                           joint_action_plan,
                                           display=False)
     self.assertEqual(len(resulting_state.order_list), 0)
Ejemplo n.º 18
0
 def test_two_coupled_agents(self):
     a0 = CoupledPlanningAgent(self.mlp_large)
     a1 = CoupledPlanningAgent(self.mlp_large)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState([P(
         (2, 2), n), P((2, 1), n)], {},
                                   order_list=['any'])
     env = OvercookedEnv(large_mdp, start_state_fn=lambda: start_state)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(end_state.order_list, [])
Ejemplo n.º 19
0
    def test_random_layout(self):
        mdp_gen_params = {"prop_feats": (1, 1)}
        mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(**mdp_gen_params)
        env = OvercookedEnv(mdp=mdp_fn, **DEFAULT_ENV_PARAMS)
        start_terrain = env.mdp.terrain_mtx

        for _ in range(3):
            env.reset()
            print(env)
            curr_terrain = env.mdp.terrain_mtx
            self.assertFalse(np.array_equal(start_terrain, curr_terrain))

        mdp_gen_params = {
            "mdp_choices": ['cramped_room', 'asymmetric_advantages']
        }
        mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(**mdp_gen_params)
        env = OvercookedEnv(mdp=mdp_fn, **DEFAULT_ENV_PARAMS)

        layouts_seen = []
        for _ in range(10):
            layouts_seen.append(env.mdp.terrain_mtx)
            env.reset()
        all_same_layout = all([
            np.array_equal(env.mdp.terrain_mtx, terrain)
            for terrain in layouts_seen
        ])
        self.assertFalse(all_same_layout)
Ejemplo n.º 20
0
 def test_two_coupled_agents_coupled_pair(self):
     mlp_simple = MediumLevelPlanner.from_pickle_or_compute(
         simple_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     cp_agent = CoupledPlanningAgent(mlp_simple)
     agent_pair = CoupledPlanningPair(cp_agent)
     start_state = OvercookedState([P(
         (2, 2), n), P((2, 1), n)], {},
                                   order_list=['any'])
     env = OvercookedEnv(simple_mdp, start_state_fn=lambda: start_state)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(end_state.order_list, [])
Ejemplo n.º 21
0
    def test_slowed_down_agent(self):
        def should_stop(step_num, stop_every_n_steps):
            # currently SlowDownAgent always stops at 2nd step 
            return not bool((i-1) % stop_every_n_steps)

        horizon = 100
        #NOTE: if stop_every_n_steps is 3 this would not work because of rounding error
        #   (ok for practical purposes, will just skip turn later but would fail test below)
        for stop_every_n_steps in [2, 4]:
            slowdown_rate = 1 - 1/stop_every_n_steps
            
            agent_pair = AgentPair(
                SlowedDownAgent(RandomAgent(), slowdown_rate), 
                SlowedDownAgent(RandomAgent(), slowdown_rate)
                )
            skip_action_probs = SlowedDownAgent(RandomAgent()).skip_action[1]["action_probs"].tolist()
            env = OvercookedEnv.from_mdp(large_mdp, horizon=horizon)
            trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)

            for i, traj_step in enumerate(trajectory):
                (s_t, a_t, r_t, done, info) = traj_step 
                if not done:
                    agent_0_probs = info["agent_infos"][0]["action_probs"]
                    agent_1_probs = info["agent_infos"][1]["action_probs"]
                    if should_stop(i, stop_every_n_steps):
                        self.assertEqual(agent_0_probs.tolist(), skip_action_probs)
                        self.assertEqual(agent_1_probs.tolist(), skip_action_probs)
                    else:
                        self.assertNotEqual(agent_0_probs.tolist(), skip_action_probs)
                        self.assertNotEqual(agent_1_probs.tolist(), skip_action_probs)
Ejemplo n.º 22
0
 def test_one_coupled_one_greedy_human(self):
     # Even though in the first ~10 timesteps it seems like agent 1 is wasting time
     # it turns out that this is actually not suboptimal as the true bottleneck is
     # going to be agent 0 later on (when it goes to get the 3rd onion)
     a0 = GreedyHumanModel(self.mlp_large)
     a1 = CoupledPlanningAgent(self.mlp_large)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState([P(
         (2, 1), s), P((1, 1), s)], {},
                                   order_list=['onion'])
     env = OvercookedEnv(large_mdp, start_state_fn=lambda: start_state)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(end_state.order_list, [])
Ejemplo n.º 23
0
    def test_transitions_and_environment(self):
        bad_state = OvercookedState([P((0, 0), s), P((3, 1), s)], {},
                                    order_list=[])

        with self.assertRaises(AssertionError):
            self.base_mdp.get_state_transition(bad_state, stay)

        env = OvercookedEnv(self.base_mdp)
        env.state.order_list = ['onion', 'any']

        def check_transition(action, expected_state, expected_reward=0):
            state = env.state
            pred_state, sparse_reward, dense_reward = self.base_mdp.get_state_transition(
                state, action)
            self.assertEqual(
                pred_state, expected_state,
                '\n' + str(pred_state) + '\n' + str(expected_state))
            new_state, sparse_reward, _, _ = env.step(action)
            self.assertEqual(new_state, expected_state)
            self.assertEqual(sparse_reward, expected_reward)

        check_transition([n, e],
                         OvercookedState([P(
                             (1, 1), n), P((3, 1), e)], {},
                                         order_list=['onion', 'any']))
Ejemplo n.º 24
0
    def activate(self):
        super(OvercookedGame, self).activate()

        # Sanity check at start of each game
        if not self.npc_players.union(self.human_players) == set(self.players):
            raise ValueError("Inconsistent State")

        self.curr_layout = self.layouts.pop()
        mdp = OvercookedGridworld.from_layout_name(self.curr_layout,
                                                   **self.mdp_params)
        self.env = OvercookedEnv.from_mdp(mdp)
        if self.show_potential:
            self.mp = MotionPlanner.from_pickle_or_compute(
                self.mdp, counter_goals=NO_COUNTERS_PARAMS)

        if self.show_potential:
            self.phi = self.mdp.potential_function(self.state,
                                                   self.mp,
                                                   gamma=0.99)
        self.start_time = time()
        self.curr_tick = 0
        self.score = 0
        self.threads = []
        for npc_policy in self.npc_policies:
            self.npc_policies[npc_policy].reset()
            self.npc_state_queues[npc_policy].put(self.state)
            t = Thread(target=self.npc_policy_consumer, args=(npc_policy, ))
            self.threads.append(t)
            t.start()
Ejemplo n.º 25
0
    def test_scenario_1_s(self):
        # Smaller version of the corridor collisions scenario above
        # to facilitate DRL training
        scenario_1_mdp = OvercookedGridworld.from_layout_name(
            'scenario1_s', start_order_list=['any'], cook_time=5)
        mlp = MediumLevelPlanner.from_pickle_or_compute(
            scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
        a0 = GreedyHumanModel(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        start_state = OvercookedState(
            [P((2, 1), s, Obj('onion', (2, 1))),
             P((4, 2), s)], {},
            order_list=['onion'])
        env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                     start_state_fn=lambda: start_state)
        trajectory, time_taken_hr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)
        env.reset()

        print("\n" * 5)
        print("-" * 50)

        a0 = CoupledPlanningAgent(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        trajectory, time_taken_rr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)

        print("H+R time taken: ", time_taken_hr)
        print("R+R time taken: ", time_taken_rr)
        self.assertGreater(time_taken_hr, time_taken_rr)
Ejemplo n.º 26
0
 def test_get_encoding_function(self):
     mdp = OvercookedGridworld.from_layout_name("cramped_room")
     mdp_params = mdp.mdp_params
     env_params = {"horizon": 100}
     env = OvercookedEnv.from_mdp(mdp, **env_params)
     state = mdp.get_standard_start_state()
     example_encoding_fns_names = ["mdp.multi_hot_orders_encoding", "env.featurize_state_mdp", "env.lossless_state_encoding_mdp"]
     example_encoding_fns = [mdp.multi_hot_orders_encoding, env.featurize_state_mdp, env.lossless_state_encoding_mdp]
     for encoding_fn_name, encoding_fn in zip(example_encoding_fns_names, example_encoding_fns):
         encoding_fn_from_name = get_encoding_function(encoding_fn_name, env=env)
         self.assertEqual(encoding_fn_from_name, encoding_fn)
         if encoding_fn_name.split(".")[0] == "mdp":
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, mdp=mdp)
             self.assertEqual(encoding_fn_from_name, encoding_fn)
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, mdp_params=mdp_params)
             # compare names as new instance of mdp is created
             self.assertEqual(encoding_fn_from_name.__name__, encoding_fn.__name__)
         else:
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, env_params=env_params, mdp_params=mdp_params)
             # compare names as new instance of env is created
             self.assertEqual(encoding_fn_from_name.__name__, encoding_fn.__name__) 
     
     expected_encoded_state_dict = {str(i): fn(state) for i, fn in enumerate(example_encoding_fns)}
     actual_encoded_state_dict = get_encoding_function({str(i): fn_name for i, fn_name in enumerate(example_encoding_fns_names)}, env=env)(state)
     self.assertEqual(expected_encoded_state_dict.keys(), actual_encoded_state_dict.keys())
     for k in expected_encoded_state_dict.keys():
         self.assertTrue(np.array_equal(expected_encoded_state_dict[k], actual_encoded_state_dict[k]))
Ejemplo n.º 27
0
 def test_four_player_env_fixed(self):
     mdp = OvercookedGridworld.from_layout_name("multiplayer_schelling")
     assert mdp.num_players == 4
     env = OvercookedEnv(mdp, horizon=16)
     a0 = FixedPlanAgent([stay, w, w])
     a1 = FixedPlanAgent([
         stay, stay, e, e, n, n, n, e, interact, n, n, w, w, w, n, interact,
         e
     ])
     a2 = FixedPlanAgent(
         [stay, w, interact, n, n, e, e, e, n, e, n, interact, w])
     a3 = FixedPlanAgent([e, interact, n, n, w, w, w, n, interact, e, s])
     ag = AgentGroup(a0, a1, a2, a3)
     env.run_agents(ag, display=False)
     self.assertEqual(env.state.players_pos_and_or,
                      (((1, 1), (-1, 0)), ((3, 1), (0, -1)),
                       ((2, 1), (-1, 0)), ((4, 2), (0, 1))))
Ejemplo n.º 28
0
 def mdps_and_envs_from_trajectories(trajectories):
     mdps, envs = [], []
     for idx in range(len(trajectories["ep_lengths"])):
         mdp_params, env_params = trajectories["mdp_params"][idx], trajectories["env_params"][idx]
         mdp = OvercookedGridworld.from_layout_name(**mdp_params)
         env = OvercookedEnv(mdp, **env_params)
         mdps.append(mdp)
         envs.append(env)
     return mdps, envs
def init_gym_env(bc_params):
    env_setup_params = copy.deepcopy(bc_params)
    del env_setup_params["data_params"]  # Not necessary for setting up env
    mdp = OvercookedGridworld.from_layout_name(**bc_params["mdp_params"])
    env = OvercookedEnv(mdp, **bc_params["env_params"])
    gym_env = gym.make("Overcooked-v0")
    
    mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=False)
    gym_env.custom_init(env, featurize_fn=lambda x: mdp.featurize_state(x, mlp))
    return gym_env
Ejemplo n.º 30
0
 def __init__(self,
              env_params,
              mdp_fn,
              force_compute=False,
              mlam_params=NO_COUNTERS_PARAMS,
              debug=False):
     """
     env_params (dict): params for creation of an OvercookedEnv
     mdp_fn (callable function): a function that can be used to create mdp
     force_compute (bool): whether should re-compute MediumLevelActionManager although matching file is found
     mlam_params (dict): the parameters for mlam, the MediumLevelActionManager
     debug (bool): whether to display debugging information on init
     """
     assert callable(
         mdp_fn), "mdp generating function must be a callable function"
     env_params["mlam_params"] = mlam_params
     self.mdp_fn = mdp_fn
     self.env = OvercookedEnv(self.mdp_fn, **env_params)
     self.force_compute = force_compute