def test_running_ppo_bc_train(self):
        # Check model exists and has right params
        layout_name = 'simple'
        best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH)
        bc_model_path = best_bc_model_paths["train"][layout_name]

        print("LOADING BC MODEL FROM: {}".format(bc_model_path))
        _, bc_params = get_bc_agent_from_saved(bc_model_path)

        expected_bc_params = {'data_params': {'train_mdps': ['simple'], 'ordered_trajs': True, 'human_ai_trajs': False, 'data_path': 'data/human/clean_train_trials.pkl'}, 'mdp_params': {'layout_name': 'simple', 'start_order_list': None}, 'env_params': {'horizon': 400}, 'mdp_fn_params': {}}
        self.assertDictEqual(expected_bc_params, bc_params)

        # Run twice with same seed and compare output dicts. Did not do as above because additional dependency on the human model

        reset_tf()
        run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': layout_name, 'OTHER_AGENT_TYPE': 'bc_train', 'SEEDS': [10]})
        train_info0 = run.result[0]

        reset_tf()
        run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': layout_name, 'OTHER_AGENT_TYPE': 'bc_train', 'SEEDS': [10]})
        train_info1 = run.result[0]

        self.assertDictEqual(train_info0, train_info1)

        # Uncomment to make current output standard output to check against
        # save_pickle(train_info1, 'data/testing/ppo_bc_train_info')

        expected_dict = load_pickle('data/testing/ppo_bc_train_info')
        for k, v in train_info1.items():
            for found_item, expected_item in zip(v, expected_dict[k]):
                self.assertAlmostEqual(found_item, expected_item, places=5)
Exemple #2
0
def get_ppo_agent(save_dir, seed=0, best=False):
    save_dir = PPO_DATA_DIR + save_dir + '/seed{}'.format(seed)
    config = load_pickle(save_dir + '/config')
    if best:
        agent = get_agent_from_saved_model(save_dir + "/best", config["sim_threads"])
    else:
        agent = get_agent_from_saved_model(save_dir + "/ppo_agent", config["sim_threads"])
    return agent, config
Exemple #3
0
def load_training_data(run_name, seeds=None):
    run_dir = PPO_DATA_DIR + run_name + "/"
    config = load_pickle(run_dir + "config")

    # To add backwards compatibility
    if seeds is None:
        if "NUM_SEEDS" in config.keys():
            seeds = list(range(min(config["NUM_SEEDS"], 5)))
        else:
            seeds = config["SEEDS"]

    train_infos = []
    for seed in seeds:
        train_info = load_pickle(run_dir + "seed{}/training_info".format(seed))
        train_infos.append(train_info)

    return train_infos, config
Exemple #4
0
def load_bc_model_from_path(model_name):
    # NOTE: The lowest loss and highest accuracy models
    # were also saved, can be found in the same dir with
    # special suffixes.
    bc_metadata = load_pickle(BC_SAVE_DIR + model_name + "/bc_metadata")
    bc_params = bc_metadata["bc_params"]
    model = GAIL.load(BC_SAVE_DIR + model_name + "/model")
    return model, bc_params
Exemple #5
0
 def test_lossless_state_featurization(self):
     trajs = self.env.get_rollouts(self.rnd_agent_pair, num_games=5)
     featurized_observations = [[
         self.base_mdp.lossless_state_encoding(state) for state in ep_states
     ] for ep_states in trajs["ep_observations"]]
     expected_featurization = load_pickle(
         "data/testing/lossless_state_featurization")
     self.assertTrue(
         np.array_equal(expected_featurization, featurized_observations))
Exemple #6
0
 def test_state_featurization(self):
     trajs = self.env.get_rollouts(self.greedy_human_model_pair, num_games=5)
     featurized_observations = [[self.base_mdp.featurize_state(state, self.mlam) for state in ep_states] for ep_states in trajs["ep_states"]]
     pickle_path = os.path.join(TESTING_DATA_DIR, "test_state_featurization", 'expected')
     # NOTE: If the featurizations are updated intentionally, you can overwrite the expected
     # featurizations by uncommenting the following line:
     # save_pickle(featurized_observations, pickle_path)
     expected_featurization = load_pickle(pickle_path)
     self.assertTrue(np.array_equal(expected_featurization, featurized_observations))
Exemple #7
0
 def test_lossless_state_featurization(self):
     trajs = self.env.get_rollouts(self.rnd_agent_pair, num_games=5)
     featurized_observations = [[
         self.base_mdp.lossless_state_encoding(state) for state in ep_states
     ] for ep_states in trajs["ep_states"]]
     # NOTE: If the featurizations are updated intentionally, you can overwrite the expected
     # featurizations by uncommenting the following line:
     # save_pickle(featurized_observations, "data/testing/lossless_state_featurization")
     expected_featurization = load_pickle(
         "data/testing/lossless_state_featurization")
     self.assertTrue(
         np.array_equal(expected_featurization, featurized_observations))
Exemple #8
0
    def test_serialization(self):
        loaded_recipes = []

        # Save and then load every recipe instance
        for i, recipe in enumerate(self.recipes):
            pickle_path = os.path.join(self.pickle_temp_dir, 'recipe_{}'.format(i))
            save_pickle(recipe, pickle_path)
            loaded = load_pickle(pickle_path)
            loaded_recipes.append(loaded)
        
        # Ensure loaded recipes equal corresponding original recipe
        for original, loaded in zip(self.recipes, loaded_recipes):
            self.assertEqual(original, loaded)
def load_baselines_model(save_dir, agent_name, config):
    """
    NOTE: Before using load it might be necessary to clear the tensorflow graph
    if there are already other variables defined
    """
    dummy_env = load_pickle(save_dir + "/dummy_env")
    model, _ = learn(network='conv_and_mlp',
                     env=dummy_env,
                     total_timesteps=0,
                     load_path=save_dir + "/model",
                     scope=agent_name,
                     network_kwargs=config)
    model.dummy_env = dummy_env
    return model
    def test_running_ppo_sp(self):
        reset_tf()

        run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': 'simple', 'OTHER_AGENT_TYPE': 'sp'})
        # Just making sure seeding is working correctly and not changing actual outputs
        train_info = run.result[0]

        # Uncomment to make current output standard output to check against
        # save_pickle(train_info, 'data/testing/ppo_sp_train_info')

        expected_sp_dict = load_pickle('data/testing/ppo_sp_train_info')
        for k, v in train_info.items():
            for found_item, expected_item in zip(v, expected_sp_dict[k]):
                self.assertAlmostEqual(found_item, expected_item, places=5)
Exemple #11
0
def configure_other_agent(params, gym_env, mlp, mdp):
    if params["OTHER_AGENT_TYPE"] == "hm":
        hl_br, hl_temp, ll_br, ll_temp = params["HM_PARAMS"]
        agent = GreedyHumanModel(mlp, hl_boltzmann_rational=hl_br, hl_temp=hl_temp, ll_boltzmann_rational=ll_br, ll_temp=ll_temp)
        gym_env.use_action_method = True

    elif params["OTHER_AGENT_TYPE"][:2] == "bc":
        best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH)
        if params["OTHER_AGENT_TYPE"] == "bc_train":
            bc_model_path = best_bc_model_paths["train"][mdp.layout_name]
        elif params["OTHER_AGENT_TYPE"] == "bc_test":
            bc_model_path = best_bc_model_paths["test"][mdp.layout_name]
        else:
            raise ValueError("Other agent type must be bc train or bc test")

        print("LOADING BC MODEL FROM: {}".format(bc_model_path))
        agent, bc_params = get_bc_agent_from_saved(bc_model_path)
        gym_env.use_action_method = True
        # Make sure environment params are the same in PPO as in the BC model
        for k, v in bc_params["env_params"].items():
            assert v == params["env_params"][k], "{} did not match. env_params: {} \t PPO params: {}".format(k, v, params[k])
        for k, v in bc_params["mdp_params"].items():
            assert v == params["mdp_params"][k], "{} did not match. mdp_params: {} \t PPO params: {}".format(k, v, params[k])

    elif params["OTHER_AGENT_TYPE"] == "rnd":
        agent = RandomAgent()

    elif params["OTHER_AGENT_TYPE"] == "sp":
        gym_env.self_play_randomization = 1
    elif params["OTHER_AGENT_TYPE"] == "sampling_sp":
        agent = RandomAgent() # just a place holder, will be replace in training loop
        gym_env.self_play_randomization = 0.0 # sp with itself 30% of the time, the rest is sampling_sp
        # gym_env.use_action_method = True

    else:
        raise ValueError("unknown type of agent to match with")
        
    if not params["OTHER_AGENT_TYPE"] == "sp":
        assert mlp.mdp == mdp
        agent.set_mdp(mdp)
        gym_env.other_agent = agent
Exemple #12
0
def eval_with_standard_baselines(n_games, model_name, display=False):
    """Method to evaluate agent performance with stable-baselines infrastructure,
    just to make sure everything is compatible and integrating correctly."""
    bc_metadata = load_pickle(BC_SAVE_DIR + model_name + "/bc_metadata")
    bc_params = bc_metadata["bc_params"]
    model = GAIL.load(BC_SAVE_DIR + model_name + "/model")

    gym_env = init_gym_env(bc_params)

    tot_rew = 0
    for i in tqdm.trange(n_games):
        obs, _ = gym_env.reset()
        done = False
        while not done:
            ob0, ob1 = obs
            a0 = stable_baselines_predict_fn(model, ob0)
            a1 = stable_baselines_predict_fn(model, ob1)
            joint_action = (a0, a1)
            (obs, _), rewards, done, info = gym_env.step(joint_action)
            tot_rew += rewards

    print("avg reward", tot_rew / n_games)
    return tot_rew / n_games
 def load_trajectories(filename):
     trajs = load_pickle(filename)
     AgentEvaluator.check_trajectories(trajs)
     return trajs
Exemple #14
0
def load_ppo_agent(save_dir, config_dir):
    # save_dir = PPO_DATA_DIR + save_dir + '/seed{}'.format(seed)
    config = load_pickle(config_dir + '/config')
    agent = get_agent_from_saved_model(save_dir, config["sim_threads"])
    return agent, config
Exemple #15
0

def run_all_ppo_hm_experiments(best_bc_model_paths):
    reset_tf()

    seeds = [8355, 5748, 1352, 3325, 8611]

    ppo_hm_model_paths = {
        "simple": "ppo_hm_simple",
        "unident_s": "ppo_hm_unident_s",
        "random1": "ppo_hm_random1",
        "random3": "ppo_hm_random3"
    }

    plot_ppo_hm_training_curves(ppo_hm_model_paths, seeds)

    set_global_seed(124)
    num_rounds = 50
    ppo_hm_performance = evaluate_all_ppo_hm_models(
        ppo_hm_model_paths,
        best_bc_model_paths['test'],
        num_rounds,
        seeds,
        best=True)
    save_pickle(ppo_hm_performance, PPO_DATA_DIR + "ppo_hm_models_performance")


if __name__ == "__main__":
    best_bc_model_paths = load_pickle("data/bc_runs/best_bc_model_paths")
    run_all_ppo_hm_experiments(best_bc_model_paths)
Exemple #16
0
 def load_trajectories(filename):
     AgentEvaluator._configure_recipe_if_needed()
     trajs = load_pickle(filename)
     AgentEvaluator.check_trajectories(trajs)
     return trajs