def evaluate_human_model_pair(self, display=True, num_games=1): a0 = GreedyHumanModel(self.mlp) a1 = GreedyHumanModel(self.mlp) agent_pair = AgentPair(a0, a1) return self.evaluate_agent_pair(agent_pair, display=display, num_games=num_games)
def setUp(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") self.mlp = MediumLevelPlanner.from_pickle_or_compute( self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True) self.env = OvercookedEnv(self.base_mdp, **DEFAULT_ENV_PARAMS) self.rnd_agent_pair = AgentPair(GreedyHumanModel(self.mlp), GreedyHumanModel(self.mlp)) np.random.seed(0)
def evaluate_human_model_pair(self, num_games=1, display=False, native_eval=False): a0 = GreedyHumanModel(self.env.mlam) a1 = GreedyHumanModel(self.env.mlam) agent_pair = AgentPair(a0, a1) return self.evaluate_agent_pair(agent_pair, num_games=num_games, display=display, native_eval=native_eval)
def test_scenario_1_s(self): # Smaller version of the corridor collisions scenario above # to facilitate DRL training scenario_1_mdp = OvercookedGridworld.from_layout_name( 'scenario1_s', start_order_list=['any'], cook_time=5) mlp = MediumLevelPlanner.from_pickle_or_compute( scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) a0 = GreedyHumanModel(mlp) a1 = CoupledPlanningAgent(mlp) agent_pair = AgentPair(a0, a1) start_state = OvercookedState( [P((2, 1), s, Obj('onion', (2, 1))), P((4, 2), s)], {}, order_list=['onion']) env = OvercookedEnv.from_mdp(scenario_1_mdp, start_state_fn=lambda: start_state) trajectory, time_taken_hr, _, _ = env.run_agents( agent_pair, include_final_state=True, display=DISPLAY) env.reset() print("\n" * 5) print("-" * 50) a0 = CoupledPlanningAgent(mlp) a1 = CoupledPlanningAgent(mlp) agent_pair = AgentPair(a0, a1) trajectory, time_taken_rr, _, _ = env.run_agents( agent_pair, include_final_state=True, display=DISPLAY) print("H+R time taken: ", time_taken_hr) print("R+R time taken: ", time_taken_rr) self.assertGreater(time_taken_hr, time_taken_rr)
def test_scenario_1(self): # Myopic corridor collision # # X X X X X O X D X X X X X # X ↓Ho X X # X X X X X X X X ↓R X # X X # X S X X X X X X X X P P X # # H on left with onion, further away to the tunnel entrance than R. # Optimal planner tells R to go first and that H will wait # for R to pass. H however, starts going through the tunnel # and they get stuck. The H plan is a bit extreme (it would probably # realize that it should retrace it's steps at some point) scenario_1_mdp = OvercookedGridworld.from_layout_name( 'small_corridor', start_order_list=['any'], cook_time=5) mlp = MediumLevelPlanner.from_pickle_or_compute( scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) a0 = GreedyHumanModel(mlp) a1 = CoupledPlanningAgent(mlp) agent_pair = AgentPair(a0, a1) start_state = OvercookedState( [P((2, 1), s, Obj('onion', (2, 1))), P((10, 2), s)], {}, order_list=['onion']) env = OvercookedEnv.from_mdp(scenario_1_mdp, start_state_fn=lambda: start_state) env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
def test_two_greedy_human_open_map(self): scenario_2_mdp = OvercookedGridworld.from_layout_name('scenario2') mlam = MediumLevelActionManager.from_pickle_or_compute( scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) a0 = GreedyHumanModel(mlam) a1 = GreedyHumanModel(mlam) agent_pair = AgentPair(a0, a1) start_state = OvercookedState( [P((8, 1), s), P((1, 1), s)], {}, all_orders=scenario_2_mdp.start_all_orders) env = OvercookedEnv.from_mdp(scenario_2_mdp, start_state_fn=lambda: start_state, horizon=100) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
def test_pedagogical_ingredients_picking(self): mdp = OvercookedGridworld.from_layout_name("asymmetric_advantages_tomato") mlam = MediumLevelActionManager.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) agent = GreedyHumanModel(mlam, choose_ingredients_pedagogically=True) self.assertEqual(agent.next_ingredients([], ['tomato', 'tomato', 'tomato'] ), {"tomato"}) self.assertEqual(agent.next_ingredients([], ['onion', 'onion', 'tomato']), {'tomato', 'onion'}) self.assertEqual(agent.next_ingredients([], ['onion', 'onion', 'onion']), {"onion"}) self.assertEqual(agent.next_ingredients(["onion"], ["onion", "onion", "tomato"]), {"tomato"}) self.assertEqual(agent.next_ingredients(["onion"], ["onion", "onion", "onion"]), {"onion"}) self.assertEqual(agent.next_ingredients(["tomato"], ["onion", "onion", "tomato"]), {"onion"}) self.assertEqual(agent.next_ingredients(["tomato"], ["tomato", "tomato", "tomato"]), {"tomato"})
def test_two_greedy_human_open_map(self): scenario_2_mdp = OvercookedGridworld.from_layout_name( 'scenario2', start_order_list=['any'], cook_time=5) mlp = MediumLevelPlanner.from_pickle_or_compute( scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) a0 = GreedyHumanModel(mlp) a1 = GreedyHumanModel(mlp) agent_pair = AgentPair(a0, a1) start_state = OvercookedState([P( (8, 1), s), P((1, 1), s)], {}, order_list=['onion']) env = OvercookedEnv.from_mdp(scenario_2_mdp, start_state_fn=lambda: start_state, horizon=100) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) end_state = trajectory[-1][0] self.assertEqual(len(end_state.order_list), 0)
def test_embedded_planning_agent(self): agent_evaluator = AgentEvaluator({"layout_name": "cramped_room"}, {"horizon": 100}) other_agent = GreedyHumanModel(agent_evaluator.mlp) epa = EmbeddedPlanningAgent(other_agent, agent_evaluator.mlp, agent_evaluator.env, delivery_horizon=1) ap = AgentPair(epa, other_agent) agent_evaluator.evaluate_agent_pair(ap, num_games=1, display=DISPLAY)
def repetative_runs(self, evaluator, num_games=10): trajectory_0 = evaluator.evaluate_human_model_pair(num_games=num_games, native_eval=True) trajectory_1 = evaluator.evaluate_human_model_pair(num_games=num_games, native_eval=True) h0 = GreedyHumanModel(evaluator.env.mlam) h1 = GreedyHumanModel(evaluator.env.mlam) ap_hh_2 = AgentPair(h0, h1) trajectory_2 = evaluator.evaluate_agent_pair(agent_pair=ap_hh_2, num_games=num_games, native_eval=True) h3 = GreedyHumanModel(evaluator.env.mlam) h4 = GreedyHumanModel(evaluator.env.mlam) ap_hh_3 = AgentPair(h3, h4) trajectory_3 = evaluator.evaluate_agent_pair(agent_pair=ap_hh_3, num_games=num_games, native_eval=True)
def test_agents_on_open_map(self): scenario_2_mdp = OvercookedGridworld.from_layout_name('scenario2') mlam = MediumLevelActionManager.from_pickle_or_compute(scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) agent_pairs = [ AgentPair(GreedyHumanModel(mlam), GreedyHumanModel(mlam)), AgentPair(SimpleGreedyHumanModel(mlam), SimpleGreedyHumanModel(mlam)), AgentPair(RandomAgent(all_actions=True), RandomAgent(all_actions=True)), AgentPair(RandomAgent(all_actions=False), RandomAgent(all_actions=False)) ] start_state = OvercookedState( [P((8, 1), s), P((1, 1), s)], {}, all_orders=scenario_2_mdp.start_all_orders ) for agent_pair in agent_pairs: env = OvercookedEnv.from_mdp(scenario_2_mdp, start_state_fn=lambda: start_state, horizon=100) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
def evaluate_one_optimal_one_greedy_human(self, num_games, h_idx=0, display=True): h = GreedyHumanModel(self.mlp) r = CoupledPlanningAgent(self.mlp) agent_pair = AgentPair(h, r) if h_idx == 0 else AgentPair(r, h) return self.evaluate_agent_pair(agent_pair, num_games=num_games, display=display)
def configure_other_agent(params, gym_env, mlp, mdp): if params["OTHER_AGENT_TYPE"] == "hm": hl_br, hl_temp, ll_br, ll_temp = params["HM_PARAMS"] agent = GreedyHumanModel(mlp, hl_boltzmann_rational=hl_br, hl_temp=hl_temp, ll_boltzmann_rational=ll_br, ll_temp=ll_temp) gym_env.use_action_method = True elif params["OTHER_AGENT_TYPE"][:2] == "bc": best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH) if params["OTHER_AGENT_TYPE"] == "bc_train": bc_model_path = best_bc_model_paths["train"][mdp.layout_name] elif params["OTHER_AGENT_TYPE"] == "bc_test": bc_model_path = best_bc_model_paths["test"][mdp.layout_name] else: raise ValueError("Other agent type must be bc train or bc test") print("LOADING BC MODEL FROM: {}".format(bc_model_path)) agent, bc_params = get_bc_agent_from_saved(bc_model_path) gym_env.use_action_method = True # Make sure environment params are the same in PPO as in the BC model for k, v in bc_params["env_params"].items(): assert v == params["env_params"][k], "{} did not match. env_params: {} \t PPO params: {}".format(k, v, params[k]) for k, v in bc_params["mdp_params"].items(): assert v == params["mdp_params"][k], "{} did not match. mdp_params: {} \t PPO params: {}".format(k, v, params[k]) elif params["OTHER_AGENT_TYPE"] == "rnd": agent = RandomAgent() elif params["OTHER_AGENT_TYPE"] == "sp": gym_env.self_play_randomization = 1 else: raise ValueError("unknown type of agent to match with") if not params["OTHER_AGENT_TYPE"] == "sp": assert mlp.mdp == mdp agent.set_mdp(mdp) gym_env.other_agent = agent
def test_one_coupled_one_greedy_human(self): # Even though in the first ~10 timesteps it seems like agent 1 is wasting time # it turns out that this is actually not suboptimal as the true bottleneck is # going to be agent 0 later on (when it goes to get the 3rd onion) a0 = GreedyHumanModel(self.mlp_large) a1 = CoupledPlanningAgent(self.mlp_large) agent_pair = AgentPair(a0, a1) start_state = OvercookedState([P( (2, 1), s), P((1, 1), s)], {}, order_list=['onion']) env = OvercookedEnv(large_mdp, start_state_fn=lambda: start_state) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) end_state = trajectory[-1][0] self.assertEqual(end_state.order_list, [])
def setUp(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") self.mlam = MediumLevelActionManager.from_pickle_or_compute(self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True) self.env = OvercookedEnv.from_mdp(self.base_mdp, **DEFAULT_ENV_PARAMS) self.greedy_human_model_pair = AgentPair(GreedyHumanModel(self.mlam), GreedyHumanModel(self.mlam)) np.random.seed(0)