def evaluate_pbt_for_layout(layout_name, num_rounds, pbt_performance, pbt_model_paths, best_test_bc_models, seeds, best=False): bc_agent, bc_params = get_bc_agent_from_saved( model_name=best_test_bc_models[layout_name]) ae = AgentEvaluator(mdp_params=bc_params["mdp_params"], env_params=bc_params["env_params"]) pbt_save_dir = PBT_DATA_DIR + pbt_model_paths[layout_name] + "/" pbt_config = load_dict_from_txt(pbt_save_dir + "config") assert common_keys_equal( bc_params["mdp_params"], pbt_config["mdp_params"] ), "Mdp params differed between PBT and BC models training" assert common_keys_equal( bc_params["env_params"], pbt_config["env_params"] ), "Env params differed between PBT and BC models training" pbt_agents = [ get_pbt_agent_from_config(pbt_save_dir, pbt_config["sim_threads"], seed=seed, agent_idx=0, best=best) for seed in seeds ] eval_pbt_over_seeds(pbt_agents, bc_agent, layout_name, num_rounds, pbt_performance, ae) return pbt_performance
def evaluate_bc_models(bc_model_paths, num_rounds): """ Evaluate BC models passed in over `num_rounds` rounds """ best_bc_models_performance = {} # Evaluate best for layout_name in bc_model_paths['train'].keys(): print(layout_name) best_bc_models_performance[layout_name] = {} eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['train'][layout_name]) best_bc_models_performance[layout_name]["BC_train+BC_train"] = mean_and_std_err(eval_trajs['ep_returns']) eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['test'][layout_name]) best_bc_models_performance[layout_name]["BC_test+BC_test"] = mean_and_std_err(eval_trajs['ep_returns']) bc_train, bc_params_train = get_bc_agent_from_saved(bc_model_paths['train'][layout_name]) bc_test, bc_params_test = get_bc_agent_from_saved(bc_model_paths['test'][layout_name]) del bc_params_train["data_params"] del bc_params_test["data_params"] assert common_keys_equal(bc_params_train, bc_params_test) ae = AgentEvaluator(mdp_params=bc_params_train["mdp_params"], env_params=bc_params_train["env_params"]) train_and_test = ae.evaluate_agent_pair(AgentPair(bc_train, bc_test), num_games=num_rounds) best_bc_models_performance[layout_name]["BC_train+BC_test_0"] = mean_and_std_err(train_and_test['ep_returns']) test_and_train = ae.evaluate_agent_pair(AgentPair(bc_test, bc_train), num_games=num_rounds) best_bc_models_performance[layout_name]["BC_train+BC_test_1"] = mean_and_std_err(test_and_train['ep_returns']) return best_bc_models_performance
def evaluate_ppo_and_bc_models_for_layout(layout, num_rounds, bc_model_paths, ppo_bc_model_paths, seeds, best=False, display=False): assert len(seeds["bc_train"]) == len(seeds["bc_test"]) ppo_bc_performance = defaultdict(lambda: defaultdict(list)) agent_bc_test, bc_params = get_bc_agent_from_saved( bc_model_paths['test'][layout]) ppo_bc_train_path = ppo_bc_model_paths['bc_train'][layout] ppo_bc_test_path = ppo_bc_model_paths['bc_test'][layout] evaluator = AgentEvaluator(mdp_params=bc_params["mdp_params"], env_params=bc_params["env_params"]) for seed_idx in range(len(seeds["bc_train"])): agent_ppo_bc_train, ppo_config = get_ppo_agent( ppo_bc_train_path, seeds["bc_train"][seed_idx], best=best) assert common_keys_equal(bc_params["mdp_params"], ppo_config["mdp_params"]) # For curiosity, how well does agent do with itself? # ppo_and_ppo = evaluator.evaluate_agent_pair(AgentPair(agent_ppo_bc_train, agent_ppo_bc_train), num_games=max(int(num_rounds/2), 1), display=display) # avg_ppo_and_ppo = np.mean(ppo_and_ppo['ep_returns']) # ppo_bc_performance[layout]["PPO_BC_train+PPO_BC_train"].append(avg_ppo_and_ppo) # How well it generalizes to new agent in simulation? ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo_bc_train, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_bc_performance[layout]["PPO_BC_train+BC_test_0"].append( avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo_bc_train), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_bc_performance[layout]["PPO_BC_train+BC_test_1"].append( avg_bc_and_ppo) # How well could we do if we knew true model BC_test? agent_ppo_bc_test, ppo_config = get_ppo_agent( ppo_bc_test_path, seeds["bc_test"][seed_idx], best=best) assert common_keys_equal(bc_params["mdp_params"], ppo_config["mdp_params"]) ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo_bc_test, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_bc_performance[layout]["PPO_BC_test+BC_test_0"].append( avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo_bc_test), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_bc_performance[layout]["PPO_BC_test+BC_test_1"].append( avg_bc_and_ppo) return ppo_bc_performance