def test_running_ppo_bc_train(self): # Check model exists and has right params layout_name = 'simple' best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH) bc_model_path = best_bc_model_paths["train"][layout_name] print("LOADING BC MODEL FROM: {}".format(bc_model_path)) _, bc_params = get_bc_agent_from_saved(bc_model_path) expected_bc_params = {'data_params': {'train_mdps': ['simple'], 'ordered_trajs': True, 'human_ai_trajs': False, 'data_path': 'data/human/clean_train_trials.pkl'}, 'mdp_params': {'layout_name': 'simple', 'start_order_list': None}, 'env_params': {'horizon': 400}, 'mdp_fn_params': {}} self.assertDictEqual(expected_bc_params, bc_params) # Run twice with same seed and compare output dicts. Did not do as above because additional dependency on the human model reset_tf() run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': layout_name, 'OTHER_AGENT_TYPE': 'bc_train', 'SEEDS': [10]}) train_info0 = run.result[0] reset_tf() run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': layout_name, 'OTHER_AGENT_TYPE': 'bc_train', 'SEEDS': [10]}) train_info1 = run.result[0] self.assertDictEqual(train_info0, train_info1) # Uncomment to make current output standard output to check against # save_pickle(train_info1, 'data/testing/ppo_bc_train_info') expected_dict = load_pickle('data/testing/ppo_bc_train_info') for k, v in train_info1.items(): for found_item, expected_item in zip(v, expected_dict[k]): self.assertAlmostEqual(found_item, expected_item, places=5)
def get_ppo_agent(save_dir, seed=0, best=False): save_dir = PPO_DATA_DIR + save_dir + '/seed{}'.format(seed) config = load_pickle(save_dir + '/config') if best: agent = get_agent_from_saved_model(save_dir + "/best", config["sim_threads"]) else: agent = get_agent_from_saved_model(save_dir + "/ppo_agent", config["sim_threads"]) return agent, config
def load_training_data(run_name, seeds=None): run_dir = PPO_DATA_DIR + run_name + "/" config = load_pickle(run_dir + "config") # To add backwards compatibility if seeds is None: if "NUM_SEEDS" in config.keys(): seeds = list(range(min(config["NUM_SEEDS"], 5))) else: seeds = config["SEEDS"] train_infos = [] for seed in seeds: train_info = load_pickle(run_dir + "seed{}/training_info".format(seed)) train_infos.append(train_info) return train_infos, config
def load_bc_model_from_path(model_name): # NOTE: The lowest loss and highest accuracy models # were also saved, can be found in the same dir with # special suffixes. bc_metadata = load_pickle(BC_SAVE_DIR + model_name + "/bc_metadata") bc_params = bc_metadata["bc_params"] model = GAIL.load(BC_SAVE_DIR + model_name + "/model") return model, bc_params
def test_lossless_state_featurization(self): trajs = self.env.get_rollouts(self.rnd_agent_pair, num_games=5) featurized_observations = [[ self.base_mdp.lossless_state_encoding(state) for state in ep_states ] for ep_states in trajs["ep_observations"]] expected_featurization = load_pickle( "data/testing/lossless_state_featurization") self.assertTrue( np.array_equal(expected_featurization, featurized_observations))
def test_state_featurization(self): trajs = self.env.get_rollouts(self.greedy_human_model_pair, num_games=5) featurized_observations = [[self.base_mdp.featurize_state(state, self.mlam) for state in ep_states] for ep_states in trajs["ep_states"]] pickle_path = os.path.join(TESTING_DATA_DIR, "test_state_featurization", 'expected') # NOTE: If the featurizations are updated intentionally, you can overwrite the expected # featurizations by uncommenting the following line: # save_pickle(featurized_observations, pickle_path) expected_featurization = load_pickle(pickle_path) self.assertTrue(np.array_equal(expected_featurization, featurized_observations))
def test_lossless_state_featurization(self): trajs = self.env.get_rollouts(self.rnd_agent_pair, num_games=5) featurized_observations = [[ self.base_mdp.lossless_state_encoding(state) for state in ep_states ] for ep_states in trajs["ep_states"]] # NOTE: If the featurizations are updated intentionally, you can overwrite the expected # featurizations by uncommenting the following line: # save_pickle(featurized_observations, "data/testing/lossless_state_featurization") expected_featurization = load_pickle( "data/testing/lossless_state_featurization") self.assertTrue( np.array_equal(expected_featurization, featurized_observations))
def test_serialization(self): loaded_recipes = [] # Save and then load every recipe instance for i, recipe in enumerate(self.recipes): pickle_path = os.path.join(self.pickle_temp_dir, 'recipe_{}'.format(i)) save_pickle(recipe, pickle_path) loaded = load_pickle(pickle_path) loaded_recipes.append(loaded) # Ensure loaded recipes equal corresponding original recipe for original, loaded in zip(self.recipes, loaded_recipes): self.assertEqual(original, loaded)
def load_baselines_model(save_dir, agent_name, config): """ NOTE: Before using load it might be necessary to clear the tensorflow graph if there are already other variables defined """ dummy_env = load_pickle(save_dir + "/dummy_env") model, _ = learn(network='conv_and_mlp', env=dummy_env, total_timesteps=0, load_path=save_dir + "/model", scope=agent_name, network_kwargs=config) model.dummy_env = dummy_env return model
def test_running_ppo_sp(self): reset_tf() run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': 'simple', 'OTHER_AGENT_TYPE': 'sp'}) # Just making sure seeding is working correctly and not changing actual outputs train_info = run.result[0] # Uncomment to make current output standard output to check against # save_pickle(train_info, 'data/testing/ppo_sp_train_info') expected_sp_dict = load_pickle('data/testing/ppo_sp_train_info') for k, v in train_info.items(): for found_item, expected_item in zip(v, expected_sp_dict[k]): self.assertAlmostEqual(found_item, expected_item, places=5)
def configure_other_agent(params, gym_env, mlp, mdp): if params["OTHER_AGENT_TYPE"] == "hm": hl_br, hl_temp, ll_br, ll_temp = params["HM_PARAMS"] agent = GreedyHumanModel(mlp, hl_boltzmann_rational=hl_br, hl_temp=hl_temp, ll_boltzmann_rational=ll_br, ll_temp=ll_temp) gym_env.use_action_method = True elif params["OTHER_AGENT_TYPE"][:2] == "bc": best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH) if params["OTHER_AGENT_TYPE"] == "bc_train": bc_model_path = best_bc_model_paths["train"][mdp.layout_name] elif params["OTHER_AGENT_TYPE"] == "bc_test": bc_model_path = best_bc_model_paths["test"][mdp.layout_name] else: raise ValueError("Other agent type must be bc train or bc test") print("LOADING BC MODEL FROM: {}".format(bc_model_path)) agent, bc_params = get_bc_agent_from_saved(bc_model_path) gym_env.use_action_method = True # Make sure environment params are the same in PPO as in the BC model for k, v in bc_params["env_params"].items(): assert v == params["env_params"][k], "{} did not match. env_params: {} \t PPO params: {}".format(k, v, params[k]) for k, v in bc_params["mdp_params"].items(): assert v == params["mdp_params"][k], "{} did not match. mdp_params: {} \t PPO params: {}".format(k, v, params[k]) elif params["OTHER_AGENT_TYPE"] == "rnd": agent = RandomAgent() elif params["OTHER_AGENT_TYPE"] == "sp": gym_env.self_play_randomization = 1 elif params["OTHER_AGENT_TYPE"] == "sampling_sp": agent = RandomAgent() # just a place holder, will be replace in training loop gym_env.self_play_randomization = 0.0 # sp with itself 30% of the time, the rest is sampling_sp # gym_env.use_action_method = True else: raise ValueError("unknown type of agent to match with") if not params["OTHER_AGENT_TYPE"] == "sp": assert mlp.mdp == mdp agent.set_mdp(mdp) gym_env.other_agent = agent
def eval_with_standard_baselines(n_games, model_name, display=False): """Method to evaluate agent performance with stable-baselines infrastructure, just to make sure everything is compatible and integrating correctly.""" bc_metadata = load_pickle(BC_SAVE_DIR + model_name + "/bc_metadata") bc_params = bc_metadata["bc_params"] model = GAIL.load(BC_SAVE_DIR + model_name + "/model") gym_env = init_gym_env(bc_params) tot_rew = 0 for i in tqdm.trange(n_games): obs, _ = gym_env.reset() done = False while not done: ob0, ob1 = obs a0 = stable_baselines_predict_fn(model, ob0) a1 = stable_baselines_predict_fn(model, ob1) joint_action = (a0, a1) (obs, _), rewards, done, info = gym_env.step(joint_action) tot_rew += rewards print("avg reward", tot_rew / n_games) return tot_rew / n_games
def load_trajectories(filename): trajs = load_pickle(filename) AgentEvaluator.check_trajectories(trajs) return trajs
def load_ppo_agent(save_dir, config_dir): # save_dir = PPO_DATA_DIR + save_dir + '/seed{}'.format(seed) config = load_pickle(config_dir + '/config') agent = get_agent_from_saved_model(save_dir, config["sim_threads"]) return agent, config
def run_all_ppo_hm_experiments(best_bc_model_paths): reset_tf() seeds = [8355, 5748, 1352, 3325, 8611] ppo_hm_model_paths = { "simple": "ppo_hm_simple", "unident_s": "ppo_hm_unident_s", "random1": "ppo_hm_random1", "random3": "ppo_hm_random3" } plot_ppo_hm_training_curves(ppo_hm_model_paths, seeds) set_global_seed(124) num_rounds = 50 ppo_hm_performance = evaluate_all_ppo_hm_models( ppo_hm_model_paths, best_bc_model_paths['test'], num_rounds, seeds, best=True) save_pickle(ppo_hm_performance, PPO_DATA_DIR + "ppo_hm_models_performance") if __name__ == "__main__": best_bc_model_paths = load_pickle("data/bc_runs/best_bc_model_paths") run_all_ppo_hm_experiments(best_bc_model_paths)
def load_trajectories(filename): AgentEvaluator._configure_recipe_if_needed() trajs = load_pickle(filename) AgentEvaluator.check_trajectories(trajs) return trajs