def evaluate_ppo_hm_and_bc(layout, ppo_hm_path, bc_test_path, num_rounds, seeds, best=False, display=False): ppo_hm_performance = defaultdict(lambda: defaultdict(list)) agent_bc_test, bc_params = get_bc_agent_from_saved(bc_test_path) del bc_params["data_params"] del bc_params["mdp_fn_params"] evaluator = AgentEvaluator(**bc_params) for seed in seeds: agent_ppo, _ = get_ppo_agent(ppo_hm_path, seed, best=best) ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_hm_performance[layout]["PPO_HM+BC_test_0"].append(avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_hm_performance[layout]["PPO_HM+BC_test_1"].append(avg_bc_and_ppo) return ppo_hm_performance
def evaluate_bc_models(bc_model_paths, num_rounds): """ Evaluate BC models passed in over `num_rounds` rounds """ best_bc_models_performance = {} # Evaluate best for layout_name in bc_model_paths['train'].keys(): print(layout_name) best_bc_models_performance[layout_name] = {} eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['train'][layout_name]) best_bc_models_performance[layout_name]["BC_train+BC_train"] = mean_and_std_err(eval_trajs['ep_returns']) eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['test'][layout_name]) best_bc_models_performance[layout_name]["BC_test+BC_test"] = mean_and_std_err(eval_trajs['ep_returns']) bc_train, bc_params_train = get_bc_agent_from_saved(bc_model_paths['train'][layout_name]) bc_test, bc_params_test = get_bc_agent_from_saved(bc_model_paths['test'][layout_name]) del bc_params_train["data_params"] del bc_params_test["data_params"] assert common_keys_equal(bc_params_train, bc_params_test) ae = AgentEvaluator(mdp_params=bc_params_train["mdp_params"], env_params=bc_params_train["env_params"]) train_and_test = ae.evaluate_agent_pair(AgentPair(bc_train, bc_test), num_games=num_rounds) best_bc_models_performance[layout_name]["BC_train+BC_test_0"] = mean_and_std_err(train_and_test['ep_returns']) test_and_train = ae.evaluate_agent_pair(AgentPair(bc_test, bc_train), num_games=num_rounds) best_bc_models_performance[layout_name]["BC_train+BC_test_1"] = mean_and_std_err(test_and_train['ep_returns']) return best_bc_models_performance
def test_embedded_planning_agent(self): agent_evaluator = AgentEvaluator({"layout_name": "cramped_room"}, {"horizon": 100}) other_agent = GreedyHumanModel(agent_evaluator.mlp) epa = EmbeddedPlanningAgent(other_agent, agent_evaluator.mlp, agent_evaluator.env, delivery_horizon=1) ap = AgentPair(epa, other_agent) agent_evaluator.evaluate_agent_pair(ap, num_games=1, display=DISPLAY)
class TestAgentEvaluator(unittest.TestCase): def setUp(self): self.agent_eval = AgentEvaluator({"layout_name": "cramped_room"}, {"horizon": 100}) def test_human_model_pair(self): trajs = self.agent_eval.evaluate_human_model_pair() try: AgentEvaluator.check_trajectories(trajs) except AssertionError as e: self.fail("Trajectories were not returned in standard format:\n{}". format(e)) def test_rollouts(self): ap = AgentPair(RandomAgent(), RandomAgent()) trajs = self.agent_eval.evaluate_agent_pair(ap, num_games=5) try: AgentEvaluator.check_trajectories(trajs) except AssertionError as e: self.fail("Trajectories were not returned in standard format:\n{}". format(e)) def test_mlp_computation(self): try: self.agent_eval.mlp except Exception as e: self.fail("Failed to compute MediumLevelPlanner:\n{}".format(e))
def eval_with_benchmarking_from_model(n_games, model, bc_params, no_waits, display=False): bc_params = copy.deepcopy(bc_params) a0 = get_bc_agent_from_model(model, bc_params, no_waits) a1 = get_bc_agent_from_model(model, bc_params, no_waits) del bc_params["data_params"], bc_params["mdp_fn_params"] a_eval = AgentEvaluator(**bc_params) ap = AgentPair(a0, a1) trajectories = a_eval.evaluate_agent_pair(ap, num_games=n_games, display=display) return trajectories
def evaluate_ppo_and_bc_models_for_layout(layout, num_rounds, bc_model_paths, ppo_bc_model_paths, seeds, best=False, display=False): assert len(seeds["bc_train"]) == len(seeds["bc_test"]) ppo_bc_performance = defaultdict(lambda: defaultdict(list)) agent_bc_test, bc_params = get_bc_agent_from_saved( bc_model_paths['test'][layout]) ppo_bc_train_path = ppo_bc_model_paths['bc_train'][layout] ppo_bc_test_path = ppo_bc_model_paths['bc_test'][layout] evaluator = AgentEvaluator(mdp_params=bc_params["mdp_params"], env_params=bc_params["env_params"]) for seed_idx in range(len(seeds["bc_train"])): agent_ppo_bc_train, ppo_config = get_ppo_agent( ppo_bc_train_path, seeds["bc_train"][seed_idx], best=best) assert common_keys_equal(bc_params["mdp_params"], ppo_config["mdp_params"]) # For curiosity, how well does agent do with itself? # ppo_and_ppo = evaluator.evaluate_agent_pair(AgentPair(agent_ppo_bc_train, agent_ppo_bc_train), num_games=max(int(num_rounds/2), 1), display=display) # avg_ppo_and_ppo = np.mean(ppo_and_ppo['ep_returns']) # ppo_bc_performance[layout]["PPO_BC_train+PPO_BC_train"].append(avg_ppo_and_ppo) # How well it generalizes to new agent in simulation? ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo_bc_train, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_bc_performance[layout]["PPO_BC_train+BC_test_0"].append( avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo_bc_train), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_bc_performance[layout]["PPO_BC_train+BC_test_1"].append( avg_bc_and_ppo) # How well could we do if we knew true model BC_test? agent_ppo_bc_test, ppo_config = get_ppo_agent( ppo_bc_test_path, seeds["bc_test"][seed_idx], best=best) assert common_keys_equal(bc_params["mdp_params"], ppo_config["mdp_params"]) ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo_bc_test, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_bc_performance[layout]["PPO_BC_test+BC_test_0"].append( avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo_bc_test), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_bc_performance[layout]["PPO_BC_test+BC_test_1"].append( avg_bc_and_ppo) return ppo_bc_performance