def evaluate_ppo_hm_and_bc(layout, ppo_hm_path, bc_test_path, num_rounds, seeds, best=False, display=False): ppo_hm_performance = defaultdict(lambda: defaultdict(list)) agent_bc_test, bc_params = get_bc_agent_from_saved(bc_test_path) del bc_params["data_params"] del bc_params["mdp_fn_params"] evaluator = AgentEvaluator(**bc_params) for seed in seeds: agent_ppo, _ = get_ppo_agent(ppo_hm_path, seed, best=best) ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_hm_performance[layout]["PPO_HM+BC_test_0"].append(avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_hm_performance[layout]["PPO_HM+BC_test_1"].append(avg_bc_and_ppo) return ppo_hm_performance
def evaluate_layout_loss_for_ppo_models(ppo_path, layout_name, trajs, eps, seeds): layout_losses = defaultdict(dict) for seed in seeds: reset_tf() agent_ppo, bc_params = get_ppo_agent(ppo_path, seed, best=True) agent_ppo.action_probs = True agent_ppo.set_mdp( OvercookedGridworld.from_layout_name(**bc_params["mdp_params"])) losses, accuracies = get_trajs_losses_for_model(trajs, agent_ppo, eps) layout_losses["{}_seed{}".format(layout_name, seed)]['losses'] = losses layout_losses["{}_seed{}".format(layout_name, seed)]['accuracies'] = accuracies return layout_losses
def setup_game(run_type, run_dir, run_seed, agent_num, player_idx): if run_type == "ppo": print("Seed", run_seed) agent, config = get_ppo_agent(run_dir, run_seed, best=True) elif run_type == "pbt": run_path = "data/" + run_type + "_runs/" + run_dir + "/seed_{}".format( run_seed) config = load_dict_from_file(run_path + "/config.txt") agent_path = run_path + '/agent' + str(agent_num) + "/best" agent = get_agent_from_saved_model(agent_path, config["sim_threads"]) elif run_type == "bc": agent, config = get_bc_agent_from_saved(run_dir) else: raise ValueError("Unrecognized run type") env = OvercookedEnv( OvercookedGridworld.from_layout_name(**config["mdp_params"]), **config["env_params"]) return env, agent, player_idx
def evaluate_ppo_and_bc_models_for_layout(layout, num_rounds, bc_model_paths, ppo_bc_model_paths, seeds, best=False, display=False): assert len(seeds["bc_train"]) == len(seeds["bc_test"]) ppo_bc_performance = defaultdict(lambda: defaultdict(list)) agent_bc_test, bc_params = get_bc_agent_from_saved( bc_model_paths['test'][layout]) ppo_bc_train_path = ppo_bc_model_paths['bc_train'][layout] ppo_bc_test_path = ppo_bc_model_paths['bc_test'][layout] evaluator = AgentEvaluator(mdp_params=bc_params["mdp_params"], env_params=bc_params["env_params"]) for seed_idx in range(len(seeds["bc_train"])): agent_ppo_bc_train, ppo_config = get_ppo_agent( ppo_bc_train_path, seeds["bc_train"][seed_idx], best=best) assert common_keys_equal(bc_params["mdp_params"], ppo_config["mdp_params"]) # For curiosity, how well does agent do with itself? # ppo_and_ppo = evaluator.evaluate_agent_pair(AgentPair(agent_ppo_bc_train, agent_ppo_bc_train), num_games=max(int(num_rounds/2), 1), display=display) # avg_ppo_and_ppo = np.mean(ppo_and_ppo['ep_returns']) # ppo_bc_performance[layout]["PPO_BC_train+PPO_BC_train"].append(avg_ppo_and_ppo) # How well it generalizes to new agent in simulation? ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo_bc_train, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_bc_performance[layout]["PPO_BC_train+BC_test_0"].append( avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo_bc_train), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_bc_performance[layout]["PPO_BC_train+BC_test_1"].append( avg_bc_and_ppo) # How well could we do if we knew true model BC_test? agent_ppo_bc_test, ppo_config = get_ppo_agent( ppo_bc_test_path, seeds["bc_test"][seed_idx], best=best) assert common_keys_equal(bc_params["mdp_params"], ppo_config["mdp_params"]) ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo_bc_test, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_bc_performance[layout]["PPO_BC_test+BC_test_0"].append( avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo_bc_test), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_bc_performance[layout]["PPO_BC_test+BC_test_1"].append( avg_bc_and_ppo) return ppo_bc_performance