Esempio n. 1
0
    def test_constructor(self):
        try:
            OvercookedEnv.from_mdp(self.base_mdp, horizon=10)
        except Exception as e:
            self.fail("Failed to instantiate OvercookedEnv:\n{}".format(e))

        with self.assertRaises(TypeError):
            OvercookedEnv.from_mdp(self.base_mdp, **{"invalid_env_param": None})
Esempio n. 2
0
    def activate(self):
        super(OvercookedGame, self).activate()

        # Sanity check at start of each game
        if not self.npc_players.union(self.human_players) == set(self.players):
            raise ValueError("Inconsistent State")

        self.curr_layout = self.layouts.pop()
        mdp = OvercookedGridworld.from_layout_name(self.curr_layout,
                                                   **self.mdp_params)
        self.env = OvercookedEnv.from_mdp(mdp)
        if self.show_potential:
            self.mp = MotionPlanner.from_pickle_or_compute(
                self.mdp, counter_goals=NO_COUNTERS_PARAMS)

        if self.show_potential:
            self.phi = self.mdp.potential_function(self.state,
                                                   self.mp,
                                                   gamma=0.99)
        self.start_time = time()
        self.curr_tick = 0
        self.score = 0
        self.threads = []
        for npc_policy in self.npc_policies:
            self.npc_policies[npc_policy].reset()
            self.npc_state_queues[npc_policy].put(self.state)
            t = Thread(target=self.npc_policy_consumer, args=(npc_policy, ))
            self.threads.append(t)
            t.start()
    def __init__(self,
                 mdp_params,
                 env_params={},
                 mdp_fn_params=None,
                 force_compute=False,
                 mlp_params=NO_COUNTERS_PARAMS,
                 debug=False):
        """
        mdp_params (dict): params for creation of an OvercookedGridworld instance through the `from_layout_name` method
        env_params (dict): params for creation of an OvercookedEnv
        mdp_fn_params (dict): params to setup random MDP generation
        force_compute (bool): whether should re-compute MediumLevelPlanner although matching file is found
        mlp_params (dict): params for MediumLevelPlanner
        """
        assert type(mdp_params) is dict, "mdp_params must be a dictionary"

        if mdp_fn_params is None:
            mdp = OvercookedGridworld.from_layout_name(**mdp_params)
            self.mdp_fn = lambda: mdp
            self.env = OvercookedEnv.from_mdp(mdp, **env_params)
        else:
            self.mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(
                mdp_params, **mdp_fn_params)
            self.env = OvercookedEnv(self.mdp_fn, **env_params)

        self.force_compute = force_compute
        self.debug = debug
        self.mlp_params = mlp_params
        self._mlp = None
    def test_scenario_1_s(self):
        # Smaller version of the corridor collisions scenario above
        # to facilitate DRL training
        scenario_1_mdp = OvercookedGridworld.from_layout_name(
            'scenario1_s', start_order_list=['any'], cook_time=5)
        mlp = MediumLevelPlanner.from_pickle_or_compute(
            scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
        a0 = GreedyHumanModel(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        start_state = OvercookedState(
            [P((2, 1), s, Obj('onion', (2, 1))),
             P((4, 2), s)], {},
            order_list=['onion'])
        env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                     start_state_fn=lambda: start_state)
        trajectory, time_taken_hr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)
        env.reset()

        print("\n" * 5)
        print("-" * 50)

        a0 = CoupledPlanningAgent(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        trajectory, time_taken_rr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)

        print("H+R time taken: ", time_taken_hr)
        print("R+R time taken: ", time_taken_rr)
        self.assertGreater(time_taken_hr, time_taken_rr)
Esempio n. 5
0
    def check_single_motion_plan(self,
                                 motion_planner,
                                 start_pos_and_or,
                                 goal_pos_and_or,
                                 expected_length=None):
        dummy_agent = P((3, 2), n)
        start_state = OvercookedState([P(*start_pos_and_or), dummy_agent], {},
                                      all_orders=simple_mdp.start_all_orders)
        action_plan, pos_and_or_plan, plan_cost = motion_planner.get_plan(
            start_pos_and_or, goal_pos_and_or)

        # Checking that last state obtained matches goal position
        self.assertEqual(pos_and_or_plan[-1], goal_pos_and_or)

        # In single motion plans the graph cost should be equal to
        # the plan cost (= plan length) as agents should never STAY
        graph_plan_cost = sum(
            [motion_planner._graph_action_cost(a) for a in action_plan])
        self.assertEqual(plan_cost, graph_plan_cost)

        joint_action_plan = [(a, stay) for a in action_plan]
        env = OvercookedEnv.from_mdp(motion_planner.mdp, horizon=1000)
        resulting_state, _ = env.execute_plan(start_state, joint_action_plan)
        self.assertEqual(resulting_state.players_pos_and_or[0],
                         goal_pos_and_or)

        if expected_length is not None:
            self.assertEqual(len(action_plan), expected_length)
Esempio n. 6
0
    def check_joint_plan(self,
                         joint_motion_planner,
                         start,
                         goal,
                         times=None,
                         min_t=None,
                         display=False):
        """Runs the plan in the environment and checks that the intended goals are achieved."""
        debug = False
        action_plan, end_pos_and_orients, plan_lengths = joint_motion_planner.get_low_level_action_plan(
            start, goal)
        if debug:
            print("Start state: {}, Goal state: {}, Action plan: {}".format(
                start, goal, action_plan))

        start_state = OvercookedState(
            [P(*start[0]), P(*start[1])], {},
            all_orders=simple_mdp.start_all_orders)
        env = OvercookedEnv.from_mdp(joint_motion_planner.mdp, horizon=1000)
        resulting_state, _ = env.execute_plan(start_state,
                                              action_plan,
                                              display=display)

        self.assertTrue(
            any([
                agent_goal in resulting_state.players_pos_and_or
                for agent_goal in goal
            ]))
        self.assertEqual(resulting_state.players_pos_and_or,
                         end_pos_and_orients)
        self.assertEqual(len(action_plan), min(plan_lengths))

        if min_t is not None: self.assertEqual(len(action_plan), min_t)
        if times is not None: self.assertEqual(plan_lengths, times)
Esempio n. 7
0
    def test_slowed_down_agent(self):
        def should_stop(step_num, stop_every_n_steps):
            # currently SlowDownAgent always stops at 2nd step 
            return not bool((i-1) % stop_every_n_steps)

        horizon = 100
        #NOTE: if stop_every_n_steps is 3 this would not work because of rounding error
        #   (ok for practical purposes, will just skip turn later but would fail test below)
        for stop_every_n_steps in [2, 4]:
            slowdown_rate = 1 - 1/stop_every_n_steps
            
            agent_pair = AgentPair(
                SlowedDownAgent(RandomAgent(), slowdown_rate), 
                SlowedDownAgent(RandomAgent(), slowdown_rate)
                )
            skip_action_probs = SlowedDownAgent(RandomAgent()).skip_action[1]["action_probs"].tolist()
            env = OvercookedEnv.from_mdp(large_mdp, horizon=horizon)
            trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)

            for i, traj_step in enumerate(trajectory):
                (s_t, a_t, r_t, done, info) = traj_step 
                if not done:
                    agent_0_probs = info["agent_infos"][0]["action_probs"]
                    agent_1_probs = info["agent_infos"][1]["action_probs"]
                    if should_stop(i, stop_every_n_steps):
                        self.assertEqual(agent_0_probs.tolist(), skip_action_probs)
                        self.assertEqual(agent_1_probs.tolist(), skip_action_probs)
                    else:
                        self.assertNotEqual(agent_0_probs.tolist(), skip_action_probs)
                        self.assertNotEqual(agent_1_probs.tolist(), skip_action_probs)
 def test_scenario_1(self):
     # Myopic corridor collision
     #
     # X X X X X O X D X X X X X
     # X   ↓Ho     X           X
     # X     X X X X X X X ↓R  X
     # X                       X
     # X S X X X X X X X X P P X
     #
     # H on left with onion, further away to the tunnel entrance than R.
     # Optimal planner tells R to go first and that H will wait
     # for R to pass. H however, starts going through the tunnel
     # and they get stuck. The H plan is a bit extreme (it would probably
     # realize that it should retrace it's steps at some point)
     scenario_1_mdp = OvercookedGridworld.from_layout_name(
         'small_corridor', start_order_list=['any'], cook_time=5)
     mlp = MediumLevelPlanner.from_pickle_or_compute(
         scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlp)
     a1 = CoupledPlanningAgent(mlp)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState(
         [P((2, 1), s, Obj('onion', (2, 1))),
          P((10, 2), s)], {},
         order_list=['onion'])
     env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                  start_state_fn=lambda: start_state)
     env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
Esempio n. 9
0
 def test_one_player_env(self):
     mdp = OvercookedGridworld.from_layout_name("cramped_room_single")
     env = OvercookedEnv.from_mdp(mdp, horizon=12)
     a0 = FixedPlanAgent([stay, w, w, e, e, n, e, interact, w, n, interact])
     ag = AgentGroup(a0)
     env.run_agents(ag, display=False)
     self.assertEqual(env.state.players_pos_and_or, (((2, 1), (0, -1)), ))
Esempio n. 10
0
    def test_transitions_and_environment(self):
        bad_state = OvercookedState([P((0, 0), s), P((3, 1), s)], {},
                                    order_list=[])

        with self.assertRaises(AssertionError):
            self.base_mdp.get_state_transition(bad_state, stay)

        env = OvercookedEnv.from_mdp(self.base_mdp)
        env.state.order_list = ['onion', 'any']

        def check_transition(action, expected_state, expected_reward=0):
            state = env.state
            pred_state, sparse_reward, dense_reward, _ = self.base_mdp.get_state_transition(
                state, action)
            self.assertEqual(
                pred_state, expected_state,
                '\n' + str(pred_state) + '\n' + str(expected_state))
            new_state, sparse_reward, _, _ = env.step(action)
            self.assertEqual(new_state, expected_state)
            self.assertEqual(sparse_reward, expected_reward)

        check_transition([n, e],
                         OvercookedState([P(
                             (1, 1), n), P((3, 1), e)], {},
                                         order_list=['onion', 'any']))
Esempio n. 11
0
 def test_get_encoding_function(self):
     mdp = OvercookedGridworld.from_layout_name("cramped_room")
     mdp_params = mdp.mdp_params
     env_params = {"horizon": 100}
     env = OvercookedEnv.from_mdp(mdp, **env_params)
     state = mdp.get_standard_start_state()
     example_encoding_fns_names = ["mdp.multi_hot_orders_encoding", "env.featurize_state_mdp", "env.lossless_state_encoding_mdp"]
     example_encoding_fns = [mdp.multi_hot_orders_encoding, env.featurize_state_mdp, env.lossless_state_encoding_mdp]
     for encoding_fn_name, encoding_fn in zip(example_encoding_fns_names, example_encoding_fns):
         encoding_fn_from_name = get_encoding_function(encoding_fn_name, env=env)
         self.assertEqual(encoding_fn_from_name, encoding_fn)
         if encoding_fn_name.split(".")[0] == "mdp":
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, mdp=mdp)
             self.assertEqual(encoding_fn_from_name, encoding_fn)
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, mdp_params=mdp_params)
             # compare names as new instance of mdp is created
             self.assertEqual(encoding_fn_from_name.__name__, encoding_fn.__name__)
         else:
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, env_params=env_params, mdp_params=mdp_params)
             # compare names as new instance of env is created
             self.assertEqual(encoding_fn_from_name.__name__, encoding_fn.__name__) 
     
     expected_encoded_state_dict = {str(i): fn(state) for i, fn in enumerate(example_encoding_fns)}
     actual_encoded_state_dict = get_encoding_function({str(i): fn_name for i, fn_name in enumerate(example_encoding_fns_names)}, env=env)(state)
     self.assertEqual(expected_encoded_state_dict.keys(), actual_encoded_state_dict.keys())
     for k in expected_encoded_state_dict.keys():
         self.assertTrue(np.array_equal(expected_encoded_state_dict[k], actual_encoded_state_dict[k]))
Esempio n. 12
0
 def setUp(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     self.mlam = MediumLevelActionManager.from_pickle_or_compute(
         self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True)
     self.env = OvercookedEnv.from_mdp(self.base_mdp, **DEFAULT_ENV_PARAMS)
     self.rnd_agent_pair = AgentPair(GreedyHumanModel(self.mlam),
                                     GreedyHumanModel(self.mlam))
     np.random.seed(0)
Esempio n. 13
0
 def test_one_coupled_one_fixed(self):
     a0 = CoupledPlanningAgent(self.mlp_large)
     a1 = FixedPlanAgent([s, e, n, w])
     agent_pair = AgentPair(a0, a1)
     env = OvercookedEnv.from_mdp(large_mdp, horizon=10)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     self.assertEqual(time_taken, 10)
Esempio n. 14
0
 def test_starting_obj_randomization(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     start_state_fn = self.base_mdp.get_random_start_state_fn(random_start_pos=False, rnd_obj_prob_thresh=0.8)
     env = OvercookedEnv.from_mdp(self.base_mdp, start_state_fn)
     start_state = env.state.all_objects_list
     for _ in range(3):
         env.reset()
         curr_terrain = env.state.all_objects_list
         self.assertFalse(np.array_equal(start_state, curr_terrain))
Esempio n. 15
0
 def test_fixed_plan_agents(self):
     a0 = FixedPlanAgent([s, e, n, w])
     a1 = FixedPlanAgent([s, w, n, e])
     agent_pair = AgentPair(a0, a1)
     env = OvercookedEnv.from_mdp(large_mdp, horizon=10)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(time_taken, 10)
     self.assertEqual(env.mdp.get_standard_start_state().player_positions, end_state.player_positions)
 def get_mdps_and_envs_from_trajectories(trajectories):
     mdps, envs = [], []
     for idx in range(len(trajectories["ep_lengths"])):
         mdp_params = copy.deepcopy(trajectories["mdp_params"][idx])
         env_params = copy.deepcopy(trajectories["env_params"][idx])
         mdp = OvercookedGridworld.from_layout_name(**mdp_params)
         env = OvercookedEnv.from_mdp(mdp, **env_params)
         mdps.append(mdp)
         envs.append(env)
     return mdps, envs
Esempio n. 17
0
 def test_starting_position_randomization(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     start_state_fn = self.base_mdp.get_random_start_state_fn(
         random_start_pos=True, rnd_obj_prob_thresh=0.0)
     env = OvercookedEnv.from_mdp(self.base_mdp, start_state_fn)
     start_state = env.state.players_pos_and_or
     for _ in range(3):
         env.reset()
         print(env)
         curr_terrain = env.state.players_pos_and_or
         self.assertFalse(np.array_equal(start_state, curr_terrain))
Esempio n. 18
0
 def test_four_player_env_fixed(self):
     mdp = OvercookedGridworld.from_layout_name("multiplayer_schelling")
     assert mdp.num_players == 4
     env = OvercookedEnv.from_mdp(mdp, horizon=16)
     a0 = FixedPlanAgent([stay, w, w])
     a1 = FixedPlanAgent([stay, stay, e, e, n, n, n, e, interact, n, n, w, w, w, n, interact, e])
     a2 = FixedPlanAgent([stay, w, interact, n, n, e, e, e, n, e, n, interact, w])
     a3 = FixedPlanAgent([e, interact, n, n, w, w, w, n, interact, e, s])
     ag = AgentGroup(a0, a1, a2, a3)
     env.run_agents(ag, display=False)
     self.assertEqual(
         env.state.players_pos_and_or,
         (((1, 1), (-1, 0)), ((3, 1), (0, -1)), ((2, 1), (-1, 0)), ((4, 2), (0, 1)))
     )
Esempio n. 19
0
 def test_two_coupled_agents(self):
     a0 = CoupledPlanningAgent(self.mlp_large)
     a1 = CoupledPlanningAgent(self.mlp_large)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState([P(
         (2, 2), n), P((2, 1), n)], {},
                                   order_list=['any'])
     env = OvercookedEnv.from_mdp(large_mdp,
                                  start_state_fn=lambda: start_state)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(end_state.order_list, [])
Esempio n. 20
0
def get_overcooked_obj_attr(attr,
                            env=None,
                            mdp=None,
                            env_params=None,
                            mdp_params=None):
    """
    returns overcooked object attribute based on its name; used mostly to get state processing (encoding) functions and gym spaces
    when receives string parse it to get attribute; format is "env"/"mdp" + "." + method name i.e "env.lossless_state_encoding_mdp"
    also support dicts (where replaces strings in values with object attributes)
    when receives method/function returns original method; this obviously does not work this way if attr is str/dict
    """
    attr_type = type(attr)
    if attr_type is str:
        name = attr
        [obj_name, attr_name] = name.split(".")
        if obj_name == "mdp":
            if not mdp:
                if env:
                    mdp = env.mdp
                else:
                    mdp = OvercookedGridworld(**mdp_params)
            attr = getattr(mdp, attr_name)
        elif obj_name == "env":
            if not env:
                if not mdp:
                    mdp = OvercookedGridworld(**mdp_params)
                env_params = only_valid_named_args(env_params,
                                                   OvercookedEnv.from_mdp)
                env = OvercookedEnv.from_mdp(mdp, **env_params)
            attr = getattr(env, attr_name)
        # not tested or used anywhere yet
        # elif obj_name in kwargs:
        #     attr = getattr(kwargs[obj_name], attr_name)
        else:
            raise ValueError("Unsupported obj attr string " + name)
    elif attr_type is dict:
        attr = {
            k: get_overcooked_obj_attr(v,
                                       env=env,
                                       mdp=mdp,
                                       env_params=env_params,
                                       mdp_params=mdp_params)
            for k, v in attr.items()
        }
    # not tested or used anywhere yet
    # elif attr_type in [list, tuple]:
    #     attr = attr_type(get_overcooked_obj_attr(elem, env=env, mdp=mdp, env_params=env_params,
    #         mdp_params=mdp_params) for elem in attr)
    return attr
Esempio n. 21
0
 def test_two_coupled_agents_coupled_pair(self):
     mlp_simple = MediumLevelPlanner.from_pickle_or_compute(
         simple_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     cp_agent = CoupledPlanningAgent(mlp_simple)
     agent_pair = CoupledPlanningPair(cp_agent)
     start_state = OvercookedState([P(
         (2, 2), n), P((2, 1), n)], {},
                                   order_list=['any'])
     env = OvercookedEnv.from_mdp(simple_mdp,
                                  start_state_fn=lambda: start_state)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(end_state.order_list, [])
 def test_two_greedy_human_open_map(self):
     scenario_2_mdp = OvercookedGridworld.from_layout_name('scenario2')
     mlam = MediumLevelActionManager.from_pickle_or_compute(
         scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlam)
     a1 = GreedyHumanModel(mlam)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState(
         [P((8, 1), s), P((1, 1), s)], {},
         all_orders=scenario_2_mdp.start_all_orders)
     env = OvercookedEnv.from_mdp(scenario_2_mdp,
                                  start_state_fn=lambda: start_state,
                                  horizon=100)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
Esempio n. 23
0
 def test_one_coupled_one_greedy_human(self):
     # Even though in the first ~10 timesteps it seems like agent 1 is wasting time
     # it turns out that this is actually not suboptimal as the true bottleneck is
     # going to be agent 0 later on (when it goes to get the 3rd onion)
     a0 = GreedyHumanModel(self.mlp_large)
     a1 = CoupledPlanningAgent(self.mlp_large)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState([P(
         (2, 1), s), P((1, 1), s)], {},
                                   order_list=['onion'])
     env = OvercookedEnv.from_mdp(large_mdp,
                                  start_state_fn=lambda: start_state)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(end_state.order_list, [])
Esempio n. 24
0
    def test_agents_on_open_map(self):
        scenario_2_mdp = OvercookedGridworld.from_layout_name('scenario2')
        mlam = MediumLevelActionManager.from_pickle_or_compute(scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
        agent_pairs = [
            AgentPair(GreedyHumanModel(mlam), GreedyHumanModel(mlam)),
            AgentPair(SimpleGreedyHumanModel(mlam), SimpleGreedyHumanModel(mlam)),
            AgentPair(RandomAgent(all_actions=True), RandomAgent(all_actions=True)),
            AgentPair(RandomAgent(all_actions=False), RandomAgent(all_actions=False))
        ]

        start_state = OvercookedState(
            [P((8, 1), s),
             P((1, 1), s)],
            {},
            all_orders=scenario_2_mdp.start_all_orders
        )
        for agent_pair in agent_pairs:
            env = OvercookedEnv.from_mdp(scenario_2_mdp, start_state_fn=lambda: start_state, horizon=100)
            trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
Esempio n. 25
0
 def test_two_greedy_human_open_map(self):
     scenario_2_mdp = OvercookedGridworld.from_layout_name(
         'scenario2', start_order_list=['any'], cook_time=5)
     mlp = MediumLevelPlanner.from_pickle_or_compute(
         scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlp)
     a1 = GreedyHumanModel(mlp)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState([P(
         (8, 1), s), P((1, 1), s)], {},
                                   order_list=['onion'])
     env = OvercookedEnv.from_mdp(scenario_2_mdp,
                                  start_state_fn=lambda: start_state,
                                  horizon=100)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(len(end_state.order_list), 0)
Esempio n. 26
0
    def test_transitions_and_environment(self):
        bad_state = OvercookedState([P((0, 0), s), P((3, 1), s)], {})

        with self.assertRaises(AssertionError):
            self.base_mdp.get_state_transition(bad_state, stay)

        env = OvercookedEnv.from_mdp(self.base_mdp)

        def check_transition(action, expected_path, recompute=False):
            # Compute actual values
            state = env.state
            pred_state, _ = self.base_mdp.get_state_transition(state, action)
            new_state, sparse_reward, _, _ = env.step(action)
            self.assertEqual(pred_state, new_state,
                             '\n' + str(pred_state) + '\n' + str(new_state))

            # Recompute expected values if desired
            if recompute:
                actual = {
                    "state": pred_state.to_dict(),
                    "reward": sparse_reward
                }
                save_as_json(actual, expected_path)

            # Compute expected values
            expected = load_from_json(expected_path)
            expected_state = OvercookedState.from_dict(expected['state'])
            expected_reward = expected['reward']

            # Make sure everything lines up (note __eq__ is transitive)
            self.assertTrue(
                pred_state.time_independent_equal(expected_state),
                '\n' + str(pred_state) + '\n' + str(expected_state))
            self.assertEqual(sparse_reward, expected_reward)

        expected_path = os.path.join(TESTING_DATA_DIR,
                                     "test_transitions_and_environments",
                                     "expected.json")

        # NOTE: set 'recompute=True' if deliberately updating state dynamics
        check_transition([n, e], expected_path, recompute=False)
Esempio n. 27
0
    def setUp(self):
        if not os.path.exists(self.temp_dir):
            os.makedirs(self.temp_dir)

        self.base_mdp = OvercookedGridworld.from_layout_name(self.layout_name)
        self.mlam = MediumLevelActionManager.from_pickle_or_compute(
            self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True, info=False)
        self.env = OvercookedEnv.from_mdp(self.base_mdp,
                                          horizon=self.horizon,
                                          info_level=0)
        self.starting_state_dict = self.base_mdp.get_standard_start_state(
        ).to_dict()

        outfile = process_human_trials_main(self.infile,
                                            self.temp_dir,
                                            insert_interacts=True,
                                            verbose=False,
                                            forward_port=False,
                                            fix_json=False)
        with open(outfile, 'rb') as f:
            self.human_data = pickle.load(f)[self.layout_name]
Esempio n. 28
0
 def test_get_gym_space(self):
     mdp = OvercookedGridworld.from_layout_name("cramped_room")
     mdp_params = mdp.mdp_params
     env_params = {"horizon": 100}
     env = OvercookedEnv.from_mdp(mdp, **env_params)
     example_gym_space_names = ["mdp.multi_hot_orders_encoding_gym_space", "mdp.featurize_state_gym_space", 
         "mdp.lossless_state_encoding_gym_space"]
     example_gym_spaces = [mdp.multi_hot_orders_encoding_gym_space, mdp.featurize_state_gym_space, 
         mdp.lossless_state_encoding_gym_space]
     for space_name, space in zip(example_gym_space_names, example_gym_spaces):
         space_from_name = get_gym_space(space_name, env=env)
         self.assertEqual(space_from_name, space)
         if space_name.split(".")[0] == "mdp":
             space_from_name = get_gym_space(space_name, mdp=mdp)
             self.assertEqual(space_from_name, space)
             space_from_name = get_gym_space(space_name, mdp_params=mdp_params)
             self.assertEqual(space_from_name, space)
         else:
             space_from_name = get_gym_space(space_name, env_params=env_params, mdp_params=mdp_params)
             self.assertEqual(space_from_name, space)
     expected_space = gym.spaces.Dict({str(i): space for i,space in enumerate(example_gym_spaces)})
     actual_space = get_gym_space({str(i): space_name for i, space_name in enumerate(example_gym_space_names)}, env=env)
     self.assertEqual(expected_space, actual_space)
Esempio n. 29
0
from stable_baselines3 import HER, DDPG, DQN, SAC, TD3
from stable_baselines3.her.goal_selection_strategy import GoalSelectionStrategy
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env.obs_dict_wrapper import ObsDictWrapper
from stable_baselines3.common.monitor import Monitor
import stable_baselines3.common.env_checker

from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

import numpy as np

model_class = TD3  # works also with SAC, DDPG and TD3

mdp = OvercookedGridworld.from_layout_name("cramped_room_single")
base_env = OvercookedEnv.from_mdp(mdp, horizon=1e4)
env = gym.make('Overcooked-single-v0')
env.custom_init(base_env, base_env.lossless_state_encoding_mdp_single)
env = Monitor(env, "./her_overcooked/", allow_early_resets=True)

# Available strategies (cf paper): future, final, episode
# goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE
goal_selection_strategy = 'future'  # equivalent to GoalSelectionStrategy.FUTURE

# If True the HER transitions will get sampled online
online_sampling = True
# Time limit for the episodes
max_episode_length = 50

action_noise = NormalActionNoise(mean=np.zeros(1), sigma=0.3 * np.ones(1))
Esempio n. 30
0
 def setUp(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     self.env = OvercookedEnv.from_mdp(self.base_mdp, **DEFAULT_ENV_PARAMS)
     self.rnd_agent_pair = AgentPair(FixedPlanAgent([]), FixedPlanAgent([]))
     np.random.seed(0)