def evaluate_bc_models(bc_model_paths, num_rounds): """ Evaluate BC models passed in over `num_rounds` rounds """ best_bc_models_performance = {} # Evaluate best for layout_name in bc_model_paths['train'].keys(): print(layout_name) best_bc_models_performance[layout_name] = {} eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['train'][layout_name]) best_bc_models_performance[layout_name]["BC_train+BC_train"] = mean_and_std_err(eval_trajs['ep_returns']) eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['test'][layout_name]) best_bc_models_performance[layout_name]["BC_test+BC_test"] = mean_and_std_err(eval_trajs['ep_returns']) bc_train, bc_params_train = get_bc_agent_from_saved(bc_model_paths['train'][layout_name]) bc_test, bc_params_test = get_bc_agent_from_saved(bc_model_paths['test'][layout_name]) del bc_params_train["data_params"] del bc_params_test["data_params"] assert common_keys_equal(bc_params_train, bc_params_test) ae = AgentEvaluator(mdp_params=bc_params_train["mdp_params"], env_params=bc_params_train["env_params"]) train_and_test = ae.evaluate_agent_pair(AgentPair(bc_train, bc_test), num_games=num_rounds) best_bc_models_performance[layout_name]["BC_train+BC_test_0"] = mean_and_std_err(train_and_test['ep_returns']) test_and_train = ae.evaluate_agent_pair(AgentPair(bc_test, bc_train), num_games=num_rounds) best_bc_models_performance[layout_name]["BC_train+BC_test_1"] = mean_and_std_err(test_and_train['ep_returns']) return best_bc_models_performance
def evaluate_pbt_for_layout(layout_name, num_rounds, pbt_performance, pbt_model_paths, best_test_bc_models, seeds, best=False): bc_agent, bc_params = get_bc_agent_from_saved( model_name=best_test_bc_models[layout_name]) ae = AgentEvaluator(mdp_params=bc_params["mdp_params"], env_params=bc_params["env_params"]) pbt_save_dir = PBT_DATA_DIR + pbt_model_paths[layout_name] + "/" pbt_config = load_dict_from_txt(pbt_save_dir + "config") assert common_keys_equal( bc_params["mdp_params"], pbt_config["mdp_params"] ), "Mdp params differed between PBT and BC models training" assert common_keys_equal( bc_params["env_params"], pbt_config["env_params"] ), "Env params differed between PBT and BC models training" pbt_agents = [ get_pbt_agent_from_config(pbt_save_dir, pbt_config["sim_threads"], seed=seed, agent_idx=0, best=best) for seed in seeds ] eval_pbt_over_seeds(pbt_agents, bc_agent, layout_name, num_rounds, pbt_performance, ae) return pbt_performance
def evaluate_ppo_hm_and_bc(layout, ppo_hm_path, bc_test_path, num_rounds, seeds, best=False, display=False): ppo_hm_performance = defaultdict(lambda: defaultdict(list)) agent_bc_test, bc_params = get_bc_agent_from_saved(bc_test_path) del bc_params["data_params"] del bc_params["mdp_fn_params"] evaluator = AgentEvaluator(**bc_params) for seed in seeds: agent_ppo, _ = get_ppo_agent(ppo_hm_path, seed, best=best) ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_hm_performance[layout]["PPO_HM+BC_test_0"].append(avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_hm_performance[layout]["PPO_HM+BC_test_1"].append(avg_bc_and_ppo) return ppo_hm_performance
def test_running_ppo_bc_train(self): # Check model exists and has right params layout_name = 'simple' best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH) bc_model_path = best_bc_model_paths["train"][layout_name] print("LOADING BC MODEL FROM: {}".format(bc_model_path)) _, bc_params = get_bc_agent_from_saved(bc_model_path) expected_bc_params = {'data_params': {'train_mdps': ['simple'], 'ordered_trajs': True, 'human_ai_trajs': False, 'data_path': 'data/human/clean_train_trials.pkl'}, 'mdp_params': {'layout_name': 'simple', 'start_order_list': None}, 'env_params': {'horizon': 400}, 'mdp_fn_params': {}} self.assertDictEqual(expected_bc_params, bc_params) # Run twice with same seed and compare output dicts. Did not do as above because additional dependency on the human model reset_tf() run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': layout_name, 'OTHER_AGENT_TYPE': 'bc_train', 'SEEDS': [10]}) train_info0 = run.result[0] reset_tf() run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': layout_name, 'OTHER_AGENT_TYPE': 'bc_train', 'SEEDS': [10]}) train_info1 = run.result[0] self.assertDictEqual(train_info0, train_info1) # Uncomment to make current output standard output to check against # save_pickle(train_info1, 'data/testing/ppo_bc_train_info') expected_dict = load_pickle('data/testing/ppo_bc_train_info') for k, v in train_info1.items(): for found_item, expected_item in zip(v, expected_dict[k]): self.assertAlmostEqual(found_item, expected_item, places=5)
def evaluate_layout_loss_for_bc_models(best_bc_model_paths, layout_name, trajs, eps): # TODO Check this isn't stochastic layout_losses = defaultdict(dict) model_name = best_bc_model_paths["train"][layout_name] bc_train, _ = get_bc_agent_from_saved(model_name=model_name) model_name = best_bc_model_paths["test"][layout_name] bc_test, _ = get_bc_agent_from_saved(model_name=model_name) bc_agents = {"train": bc_train, "test": bc_test} for agent_type, bc_agent in bc_agents.items(): bc_agent.action_probs = True bc_agent.stochastic = False bc_agent.will_unblock_if_stuck = False losses, accuracies = get_trajs_losses_for_model(trajs, bc_agent, eps) layout_losses[agent_type]['losses'] = losses layout_losses[agent_type]['accuracies'] = accuracies return layout_losses
def setup_game(run_type, run_dir, run_seed, agent_num, player_idx): if run_type == "ppo": print("Seed", run_seed) agent, config = get_ppo_agent(run_dir, run_seed, best=True) elif run_type == "pbt": run_path = "data/" + run_type + "_runs/" + run_dir + "/seed_{}".format( run_seed) config = load_dict_from_file(run_path + "/config.txt") agent_path = run_path + '/agent' + str(agent_num) + "/best" agent = get_agent_from_saved_model(agent_path, config["sim_threads"]) elif run_type == "bc": agent, config = get_bc_agent_from_saved(run_dir) else: raise ValueError("Unrecognized run type") env = OvercookedEnv( OvercookedGridworld.from_layout_name(**config["mdp_params"]), **config["env_params"]) return env, agent, player_idx
def configure_other_agent(params, gym_env, mlp, mdp): if params["OTHER_AGENT_TYPE"] == "hm": hl_br, hl_temp, ll_br, ll_temp = params["HM_PARAMS"] agent = GreedyHumanModel(mlp, hl_boltzmann_rational=hl_br, hl_temp=hl_temp, ll_boltzmann_rational=ll_br, ll_temp=ll_temp) gym_env.use_action_method = True elif params["OTHER_AGENT_TYPE"][:2] == "bc": best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH) if params["OTHER_AGENT_TYPE"] == "bc_train": bc_model_path = best_bc_model_paths["train"][mdp.layout_name] elif params["OTHER_AGENT_TYPE"] == "bc_test": bc_model_path = best_bc_model_paths["test"][mdp.layout_name] else: raise ValueError("Other agent type must be bc train or bc test") print("LOADING BC MODEL FROM: {}".format(bc_model_path)) agent, bc_params = get_bc_agent_from_saved(bc_model_path) gym_env.use_action_method = True # Make sure environment params are the same in PPO as in the BC model for k, v in bc_params["env_params"].items(): assert v == params["env_params"][k], "{} did not match. env_params: {} \t PPO params: {}".format(k, v, params[k]) for k, v in bc_params["mdp_params"].items(): assert v == params["mdp_params"][k], "{} did not match. mdp_params: {} \t PPO params: {}".format(k, v, params[k]) elif params["OTHER_AGENT_TYPE"] == "rnd": agent = RandomAgent() elif params["OTHER_AGENT_TYPE"] == "sp": gym_env.self_play_randomization = 1 elif params["OTHER_AGENT_TYPE"] == "sampling_sp": agent = RandomAgent() # just a place holder, will be replace in training loop gym_env.self_play_randomization = 0.0 # sp with itself 30% of the time, the rest is sampling_sp # gym_env.use_action_method = True else: raise ValueError("unknown type of agent to match with") if not params["OTHER_AGENT_TYPE"] == "sp": assert mlp.mdp == mdp agent.set_mdp(mdp) gym_env.other_agent = agent
def evaluate_ppo_and_bc_models_for_layout(layout, num_rounds, bc_model_paths, ppo_bc_model_paths, seeds, best=False, display=False): assert len(seeds["bc_train"]) == len(seeds["bc_test"]) ppo_bc_performance = defaultdict(lambda: defaultdict(list)) agent_bc_test, bc_params = get_bc_agent_from_saved( bc_model_paths['test'][layout]) ppo_bc_train_path = ppo_bc_model_paths['bc_train'][layout] ppo_bc_test_path = ppo_bc_model_paths['bc_test'][layout] evaluator = AgentEvaluator(mdp_params=bc_params["mdp_params"], env_params=bc_params["env_params"]) for seed_idx in range(len(seeds["bc_train"])): agent_ppo_bc_train, ppo_config = get_ppo_agent( ppo_bc_train_path, seeds["bc_train"][seed_idx], best=best) assert common_keys_equal(bc_params["mdp_params"], ppo_config["mdp_params"]) # For curiosity, how well does agent do with itself? # ppo_and_ppo = evaluator.evaluate_agent_pair(AgentPair(agent_ppo_bc_train, agent_ppo_bc_train), num_games=max(int(num_rounds/2), 1), display=display) # avg_ppo_and_ppo = np.mean(ppo_and_ppo['ep_returns']) # ppo_bc_performance[layout]["PPO_BC_train+PPO_BC_train"].append(avg_ppo_and_ppo) # How well it generalizes to new agent in simulation? ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo_bc_train, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_bc_performance[layout]["PPO_BC_train+BC_test_0"].append( avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo_bc_train), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_bc_performance[layout]["PPO_BC_train+BC_test_1"].append( avg_bc_and_ppo) # How well could we do if we knew true model BC_test? agent_ppo_bc_test, ppo_config = get_ppo_agent( ppo_bc_test_path, seeds["bc_test"][seed_idx], best=best) assert common_keys_equal(bc_params["mdp_params"], ppo_config["mdp_params"]) ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair( agent_ppo_bc_test, agent_bc_test), num_games=num_rounds, display=display) avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns']) ppo_bc_performance[layout]["PPO_BC_test+BC_test_0"].append( avg_ppo_and_bc) bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair( agent_bc_test, agent_ppo_bc_test), num_games=num_rounds, display=display) avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns']) ppo_bc_performance[layout]["PPO_BC_test+BC_test_1"].append( avg_bc_and_ppo) return ppo_bc_performance
def P_BC_evaluation_for_layout(ae, layout, best_bc_models): delivery_horizon = get_delivery_horizon(layout) print("Delivery horizon for layout {}: {}".format(layout, delivery_horizon)) layout_p_bc_eval = {} ####################### # P_BC_test + BC_test # ####################### # Prepare BC_test test_model_name = best_bc_models["test"][layout] agent_bc_test, _ = get_bc_agent_from_saved(test_model_name) agent_bc_test.stochastic = False # Prepare P_BC_test (making another copy of BC_test just to be embedded in P_BC) agent_bc_test_embedded, _ = get_bc_agent_from_saved(test_model_name) agent_bc_test_embedded.stochastic = False p_bc_test = EmbeddedPlanningAgent(agent_bc_test_embedded, agent_bc_test_embedded.mlp, delivery_horizon) p_bc_test.env = ae.env p_bc_test.debug = True # Execute runs ap_training = AgentPair(p_bc_test, agent_bc_test) data0 = ae.evaluate_agent_pair(ap_training, num_games=1, display=True) layout_p_bc_eval['P_BC_test+BC_test_0'] = data0['ep_returns'][0] ap_training = AgentPair(agent_bc_test, p_bc_test) data1 = ae.evaluate_agent_pair(ap_training, num_games=1, display=True) layout_p_bc_eval['P_BC_test+BC_test_1'] = data1['ep_returns'][0] print("P_BC_test + BC_test", data0['ep_returns'][0], data1['ep_returns'][0]) ######################## # P_BC_train + BC_test # ######################## # Prepare P_BC_train train_model_name = best_bc_models["train"][layout] agent_bc_train_embedded, _ = get_bc_agent_from_saved(train_model_name) agent_bc_train_embedded.stochastic = False p_bc_train = EmbeddedPlanningAgent(agent_bc_train_embedded, agent_bc_train_embedded.mlp, delivery_horizon) p_bc_train.env = ae.env p_bc_train.debug = True # Execute runs ap_testing = AgentPair(p_bc_train, agent_bc_test) data0 = ae.evaluate_agent_pair(ap_testing, num_games=1, display=True) layout_p_bc_eval['P_BC_train+BC_test_0'] = data0['ep_returns'][0] ap_testing = AgentPair(agent_bc_test, p_bc_train) data1 = ae.evaluate_agent_pair(ap_testing, num_games=1, display=True) layout_p_bc_eval['P_BC_train+BC_test_1'] = data1['ep_returns'][0] print("P_BC_train + BC_test", data0['ep_returns'][0], data1['ep_returns'][0]) return layout_p_bc_eval