コード例 #1
0
 def evaluate_human_model_pair(self, display=True, num_games=1):
     a0 = GreedyHumanModel(self.mlp)
     a1 = GreedyHumanModel(self.mlp)
     agent_pair = AgentPair(a0, a1)
     return self.evaluate_agent_pair(agent_pair,
                                     display=display,
                                     num_games=num_games)
コード例 #2
0
 def setUp(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     self.mlp = MediumLevelPlanner.from_pickle_or_compute(
         self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True)
     self.env = OvercookedEnv(self.base_mdp, **DEFAULT_ENV_PARAMS)
     self.rnd_agent_pair = AgentPair(GreedyHumanModel(self.mlp),
                                     GreedyHumanModel(self.mlp))
     np.random.seed(0)
コード例 #3
0
 def evaluate_human_model_pair(self,
                               num_games=1,
                               display=False,
                               native_eval=False):
     a0 = GreedyHumanModel(self.env.mlam)
     a1 = GreedyHumanModel(self.env.mlam)
     agent_pair = AgentPair(a0, a1)
     return self.evaluate_agent_pair(agent_pair,
                                     num_games=num_games,
                                     display=display,
                                     native_eval=native_eval)
コード例 #4
0
    def test_scenario_1_s(self):
        # Smaller version of the corridor collisions scenario above
        # to facilitate DRL training
        scenario_1_mdp = OvercookedGridworld.from_layout_name(
            'scenario1_s', start_order_list=['any'], cook_time=5)
        mlp = MediumLevelPlanner.from_pickle_or_compute(
            scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
        a0 = GreedyHumanModel(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        start_state = OvercookedState(
            [P((2, 1), s, Obj('onion', (2, 1))),
             P((4, 2), s)], {},
            order_list=['onion'])
        env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                     start_state_fn=lambda: start_state)
        trajectory, time_taken_hr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)
        env.reset()

        print("\n" * 5)
        print("-" * 50)

        a0 = CoupledPlanningAgent(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        trajectory, time_taken_rr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)

        print("H+R time taken: ", time_taken_hr)
        print("R+R time taken: ", time_taken_rr)
        self.assertGreater(time_taken_hr, time_taken_rr)
コード例 #5
0
 def test_scenario_1(self):
     # Myopic corridor collision
     #
     # X X X X X O X D X X X X X
     # X   ↓Ho     X           X
     # X     X X X X X X X ↓R  X
     # X                       X
     # X S X X X X X X X X P P X
     #
     # H on left with onion, further away to the tunnel entrance than R.
     # Optimal planner tells R to go first and that H will wait
     # for R to pass. H however, starts going through the tunnel
     # and they get stuck. The H plan is a bit extreme (it would probably
     # realize that it should retrace it's steps at some point)
     scenario_1_mdp = OvercookedGridworld.from_layout_name(
         'small_corridor', start_order_list=['any'], cook_time=5)
     mlp = MediumLevelPlanner.from_pickle_or_compute(
         scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlp)
     a1 = CoupledPlanningAgent(mlp)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState(
         [P((2, 1), s, Obj('onion', (2, 1))),
          P((10, 2), s)], {},
         order_list=['onion'])
     env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                  start_state_fn=lambda: start_state)
     env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
コード例 #6
0
 def test_two_greedy_human_open_map(self):
     scenario_2_mdp = OvercookedGridworld.from_layout_name('scenario2')
     mlam = MediumLevelActionManager.from_pickle_or_compute(
         scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlam)
     a1 = GreedyHumanModel(mlam)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState(
         [P((8, 1), s), P((1, 1), s)], {},
         all_orders=scenario_2_mdp.start_all_orders)
     env = OvercookedEnv.from_mdp(scenario_2_mdp,
                                  start_state_fn=lambda: start_state,
                                  horizon=100)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
コード例 #7
0
    def test_pedagogical_ingredients_picking(self):
        mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages_tomato")
        mlam = MediumLevelActionManager.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
        agent = GreedyHumanModel(mlam, choose_ingredients_pedagogically=True)

        self.assertEqual(agent.next_ingredients([], ['tomato', 'tomato', 'tomato'] ), {"tomato"})
        self.assertEqual(agent.next_ingredients([], ['onion', 'onion', 'tomato']), {'tomato', 'onion'})
        self.assertEqual(agent.next_ingredients([], ['onion', 'onion', 'onion']), {"onion"})

        self.assertEqual(agent.next_ingredients(["onion"], ["onion", "onion", "tomato"]), {"tomato"})
        self.assertEqual(agent.next_ingredients(["onion"], ["onion", "onion", "onion"]), {"onion"})

        self.assertEqual(agent.next_ingredients(["tomato"], ["onion", "onion", "tomato"]), {"onion"})
        self.assertEqual(agent.next_ingredients(["tomato"], ["tomato", "tomato", "tomato"]), {"tomato"})
コード例 #8
0
 def test_two_greedy_human_open_map(self):
     scenario_2_mdp = OvercookedGridworld.from_layout_name(
         'scenario2', start_order_list=['any'], cook_time=5)
     mlp = MediumLevelPlanner.from_pickle_or_compute(
         scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlp)
     a1 = GreedyHumanModel(mlp)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState([P(
         (8, 1), s), P((1, 1), s)], {},
                                   order_list=['onion'])
     env = OvercookedEnv.from_mdp(scenario_2_mdp,
                                  start_state_fn=lambda: start_state,
                                  horizon=100)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(len(end_state.order_list), 0)
コード例 #9
0
 def test_embedded_planning_agent(self):
     agent_evaluator = AgentEvaluator({"layout_name": "cramped_room"},
                                      {"horizon": 100})
     other_agent = GreedyHumanModel(agent_evaluator.mlp)
     epa = EmbeddedPlanningAgent(other_agent,
                                 agent_evaluator.mlp,
                                 agent_evaluator.env,
                                 delivery_horizon=1)
     ap = AgentPair(epa, other_agent)
     agent_evaluator.evaluate_agent_pair(ap, num_games=1, display=DISPLAY)
コード例 #10
0
    def repetative_runs(self, evaluator, num_games=10):
        trajectory_0 = evaluator.evaluate_human_model_pair(num_games=num_games,
                                                           native_eval=True)
        trajectory_1 = evaluator.evaluate_human_model_pair(num_games=num_games,
                                                           native_eval=True)

        h0 = GreedyHumanModel(evaluator.env.mlam)
        h1 = GreedyHumanModel(evaluator.env.mlam)
        ap_hh_2 = AgentPair(h0, h1)
        trajectory_2 = evaluator.evaluate_agent_pair(agent_pair=ap_hh_2,
                                                     num_games=num_games,
                                                     native_eval=True)

        h3 = GreedyHumanModel(evaluator.env.mlam)
        h4 = GreedyHumanModel(evaluator.env.mlam)
        ap_hh_3 = AgentPair(h3, h4)
        trajectory_3 = evaluator.evaluate_agent_pair(agent_pair=ap_hh_3,
                                                     num_games=num_games,
                                                     native_eval=True)
コード例 #11
0
    def test_agents_on_open_map(self):
        scenario_2_mdp = OvercookedGridworld.from_layout_name('scenario2')
        mlam = MediumLevelActionManager.from_pickle_or_compute(scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
        agent_pairs = [
            AgentPair(GreedyHumanModel(mlam), GreedyHumanModel(mlam)),
            AgentPair(SimpleGreedyHumanModel(mlam), SimpleGreedyHumanModel(mlam)),
            AgentPair(RandomAgent(all_actions=True), RandomAgent(all_actions=True)),
            AgentPair(RandomAgent(all_actions=False), RandomAgent(all_actions=False))
        ]

        start_state = OvercookedState(
            [P((8, 1), s),
             P((1, 1), s)],
            {},
            all_orders=scenario_2_mdp.start_all_orders
        )
        for agent_pair in agent_pairs:
            env = OvercookedEnv.from_mdp(scenario_2_mdp, start_state_fn=lambda: start_state, horizon=100)
            trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
コード例 #12
0
 def evaluate_one_optimal_one_greedy_human(self,
                                           num_games,
                                           h_idx=0,
                                           display=True):
     h = GreedyHumanModel(self.mlp)
     r = CoupledPlanningAgent(self.mlp)
     agent_pair = AgentPair(h, r) if h_idx == 0 else AgentPair(r, h)
     return self.evaluate_agent_pair(agent_pair,
                                     num_games=num_games,
                                     display=display)
コード例 #13
0
ファイル: ppo.py プロジェクト: Kennard123661/cs6244-project
def configure_other_agent(params, gym_env, mlp, mdp):
    if params["OTHER_AGENT_TYPE"] == "hm":
        hl_br, hl_temp, ll_br, ll_temp = params["HM_PARAMS"]
        agent = GreedyHumanModel(mlp, hl_boltzmann_rational=hl_br, hl_temp=hl_temp, ll_boltzmann_rational=ll_br,
                                 ll_temp=ll_temp)
        gym_env.use_action_method = True

    elif params["OTHER_AGENT_TYPE"][:2] == "bc":
        best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH)
        if params["OTHER_AGENT_TYPE"] == "bc_train":
            bc_model_path = best_bc_model_paths["train"][mdp.layout_name]
        elif params["OTHER_AGENT_TYPE"] == "bc_test":
            bc_model_path = best_bc_model_paths["test"][mdp.layout_name]
        else:
            raise ValueError("Other agent type must be bc train or bc test")

        print("LOADING BC MODEL FROM: {}".format(bc_model_path))
        agent, bc_params = get_bc_agent_from_saved(bc_model_path)
        gym_env.use_action_method = True
        # Make sure environment params are the same in PPO as in the BC model
        for k, v in bc_params["env_params"].items():
            assert v == params["env_params"][k], "{} did not match. env_params: {} \t PPO params: {}".format(k, v, params[k])
        for k, v in bc_params["mdp_params"].items():
            assert v == params["mdp_params"][k], "{} did not match. mdp_params: {} \t PPO params: {}".format(k, v, params[k])

    elif params["OTHER_AGENT_TYPE"] == "rnd":
        agent = RandomAgent()

    elif params["OTHER_AGENT_TYPE"] == "sp":
        gym_env.self_play_randomization = 1

    else:
        raise ValueError("unknown type of agent to match with")
        
    if not params["OTHER_AGENT_TYPE"] == "sp":
        assert mlp.mdp == mdp
        agent.set_mdp(mdp)
        gym_env.other_agent = agent
コード例 #14
0
 def test_one_coupled_one_greedy_human(self):
     # Even though in the first ~10 timesteps it seems like agent 1 is wasting time
     # it turns out that this is actually not suboptimal as the true bottleneck is
     # going to be agent 0 later on (when it goes to get the 3rd onion)
     a0 = GreedyHumanModel(self.mlp_large)
     a1 = CoupledPlanningAgent(self.mlp_large)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState([P(
         (2, 1), s), P((1, 1), s)], {},
                                   order_list=['onion'])
     env = OvercookedEnv(large_mdp, start_state_fn=lambda: start_state)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(end_state.order_list, [])
コード例 #15
0
 def setUp(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     self.mlam = MediumLevelActionManager.from_pickle_or_compute(self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True)
     self.env = OvercookedEnv.from_mdp(self.base_mdp, **DEFAULT_ENV_PARAMS)
     self.greedy_human_model_pair = AgentPair(GreedyHumanModel(self.mlam), GreedyHumanModel(self.mlam))
     np.random.seed(0)