def run_all_pbt_experiments(best_bc_model_paths): # best_bc_models = load_pickle("data/bc_runs/best_bc_models") seeds = [8015, 3554, 581, 5608, 4221] pbt_model_paths = { "simple": "pbt_simple", "unident_s": "pbt_unident_s", "random1": "pbt_random1", "random3": "pbt_random3", "random0": "pbt_random0" } # Plotting plot_pbt_runs(pbt_model_paths, seeds, save=True) # Evaluating set_global_seed(512) num_rounds = 100 pbt_performance = evaluate_all_pbt_models(pbt_model_paths, best_bc_model_paths, num_rounds, seeds, best=False) save_pickle(pbt_performance, PBT_DATA_DIR + "pbt_performance")
def save_trajectories(trajectories, filename): AgentEvaluator.check_trajectories(trajectories) if any(t["env_params"]["start_state_fn"] is not None for t in trajectories): print( "Saving trajectories with a custom start state. This can currently " "cause things to break when loading in the trajectories.") save_pickle(trajectories, filename)
def save_bc_model(model_save_dir, model, bc_params): print("Saved BC model at", BC_SAVE_DIR + model_save_dir) model.save(BC_SAVE_DIR + model_save_dir + "model") bc_metadata = { "bc_params": bc_params, "train_info": model.bc_info } save_pickle(bc_metadata, BC_SAVE_DIR + model_save_dir + "bc_metadata")
def test_serialization(self): loaded_recipes = [] # Save and then load every recipe instance for i, recipe in enumerate(self.recipes): pickle_path = os.path.join(self.pickle_temp_dir, 'recipe_{}'.format(i)) save_pickle(recipe, pickle_path) loaded = load_pickle(pickle_path) loaded_recipes.append(loaded) # Ensure loaded recipes equal corresponding original recipe for original, loaded in zip(self.recipes, loaded_recipes): self.assertEqual(original, loaded)
def save_baselines_model(model, save_dir): """ Saves Model (from baselines) into `path/model` file, and saves the tensorflow graph in the `path` directory NOTE: Overwrites previously saved models at the location """ create_dir_if_not_exists(save_dir) model.save(save_dir + "/model") # We save the dummy env so that one doesn't # have to pass in an actual env to load the model later, # as the only information taken from the env are these parameters # at test time (if no training happens) dummy_env = DummyEnv(model.dummy_env.num_envs, model.dummy_env.observation_space, model.dummy_env.action_space) save_pickle(dummy_env, save_dir + "/dummy_env")
def run_all_bc_experiments(): # Train BC models seeds = [5415, 2652, 6440, 1965, 6647] num_seeds = len(seeds) params_unident = {"layout_name": "unident_s", "num_epochs": 120, "lr": 1e-3, "adam_eps":1e-8} params_simple = {"layout_name": "simple", "num_epochs": 100, "lr": 1e-3, "adam_eps":1e-8} params_random1 = {"layout_name": "random1", "num_epochs": 120, "lr": 1e-3, "adam_eps":1e-8} params_random0 = {"layout_name": "random0", "num_epochs": 90, "lr": 1e-3, "adam_eps":1e-8} params_random3 = {"layout_name": "random3", "num_epochs": 110, "lr": 1e-3, "adam_eps":1e-8} all_params = [params_simple, params_random1, params_unident, params_random0, params_random3] train_bc_models(all_params, seeds) # Evaluate BC models set_global_seed(64) num_rounds = 100 bc_models_evaluation = evaluate_all_bc_models(all_params, num_rounds, num_seeds) save_pickle(bc_models_evaluation, BC_MODELS_EVALUATION_PATH) print("All BC models evaluation: ", bc_models_evaluation) # These models have been manually selected to more or less match in performance, # (test BC model should be a bit better than the train BC model) selected_models = { "simple": [0, 1], "unident_s": [0, 0], "random1": [4, 2], "random0": [2, 1], "random3": [3, 3] } final_bc_model_paths = { "train": {}, "test": {} } for layout_name, seed_indices in selected_models.items(): train_idx, test_idx = seed_indices final_bc_model_paths["train"][layout_name] = "{}_bc_train_seed{}".format(layout_name, train_idx) final_bc_model_paths["test"][layout_name] = "{}_bc_test_seed{}".format(layout_name, test_idx) best_bc_models_performance = evaluate_bc_models(final_bc_model_paths, num_rounds) save_pickle(best_bc_models_performance, BC_SAVE_DIR + "best_bc_models_performance")
def run_all_ppo_hm_experiments(best_bc_model_paths): reset_tf() seeds = [8355, 5748, 1352, 3325, 8611] ppo_hm_model_paths = { "simple": "ppo_hm_simple", "unident_s": "ppo_hm_unident_s", "random1": "ppo_hm_random1", "random3": "ppo_hm_random3" } plot_ppo_hm_training_curves(ppo_hm_model_paths, seeds) set_global_seed(124) num_rounds = 50 ppo_hm_performance = evaluate_all_ppo_hm_models( ppo_hm_model_paths, best_bc_model_paths['test'], num_rounds, seeds, best=True) save_pickle(ppo_hm_performance, PPO_DATA_DIR + "ppo_hm_models_performance")
def run_all_ppo_bc_experiments(best_bc_model_paths): reset_tf() seeds = { "bc_train": [9456, 1887, 5578, 5987, 516], "bc_test": [2888, 7424, 7360, 4467, 184] } ppo_bc_model_paths = { 'bc_train': { "simple": "ppo_bc_train_simple", "unident_s": "ppo_bc_train_unident_s", "random1": "ppo_bc_train_random1", "random0": "ppo_bc_train_random0", "random3": "ppo_bc_train_random3" }, 'bc_test': { "simple": "ppo_bc_test_simple", "unident_s": "ppo_bc_test_unident_s", "random1": "ppo_bc_test_random1", "random0": "ppo_bc_test_random0", "random3": "ppo_bc_test_random3" } } plot_runs_training_curves(ppo_bc_model_paths, seeds, save=True) set_global_seed(248) num_rounds = 100 ppo_bc_performance = evaluate_all_ppo_bc_models(ppo_bc_model_paths, best_bc_model_paths, num_rounds, seeds, best=True) ppo_bc_performance = prepare_nested_default_dict_for_pickle( ppo_bc_performance) save_pickle(ppo_bc_performance, PPO_DATA_DIR + "ppo_bc_models_performance")
def run_all_ppo_sp_experiments(best_bc_model_paths): reset_tf() seeds = [2229, 7649, 7225, 9807, 386] ppo_sp_model_paths = { "simple": "ppo_sp_simple", "unident_s": "ppo_sp_unident_s", "random1": "ppo_sp_random1", "random0": "ppo_sp_random0", "random3": "ppo_sp_random3" } plot_ppo_sp_training_curves(ppo_sp_model_paths, seeds, save=True) set_global_seed(124) num_rounds = 100 ppo_sp_performance = evaluate_all_sp_ppo_models( ppo_sp_model_paths, best_bc_model_paths['test'], num_rounds, seeds, best=True) save_pickle(ppo_sp_performance, PPO_DATA_DIR + "ppo_sp_models_performance")
def ppo_run(params): create_dir_if_not_exists(params["SAVE_DIR"]) save_pickle(params, params["SAVE_DIR"] + "config") ############# # PPO SETUP # ############# train_infos = [] for seed in params["SEEDS"]: reset_tf() set_global_seed(seed) curr_seed_dir = params["SAVE_DIR"] + "seed" + str(seed) + "/" create_dir_if_not_exists(curr_seed_dir) save_pickle(params, curr_seed_dir + "config") print("Creating env with params", params) # Configure mdp mdp = OvercookedGridworld.from_layout_name(**params["mdp_params"]) env = OvercookedEnv(mdp, **params["env_params"]) mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=True) # Configure gym env gym_env = get_vectorized_gym_env( env, 'Overcooked-v0', featurize_fn=lambda x: mdp.lossless_state_encoding(x), **params ) gym_env.self_play_randomization = 0 if params["SELF_PLAY_HORIZON"] is None else 1 gym_env.trajectory_sp = params["TRAJECTORY_SELF_PLAY"] gym_env.update_reward_shaping_param(1 if params["mdp_params"]["rew_shaping_params"] != 0 else 0) configure_other_agent(params, gym_env, mlp, mdp) # Create model with tf.device('/device:GPU:{}'.format(params["GPU_ID"])): model = create_model(gym_env, "ppo_agent", **params) # Train model params["CURR_SEED"] = seed train_info = update_model(gym_env, model, **params) # Save model save_ppo_model(model, curr_seed_dir + model.agent_name) print("Saved training info at", curr_seed_dir + "training_info") save_pickle(train_info, curr_seed_dir + "training_info") train_infos.append(train_info) return train_infos
def save_trajectory(trajectory, filename): AgentEvaluator.check_trajectories(trajectory) save_pickle(trajectory, filename)
def run_all_bc_experiments(): # Train BC models seeds = [5415, 2652, 6440, 1965, 6647] num_seeds = len(seeds) params_unident = { "layout_name": "unident_s", "num_epochs": 120, "lr": 1e-3, "adam_eps": 1e-8 } params_simple = { "layout_name": "simple", "num_epochs": 100, "lr": 1e-3, "adam_eps": 1e-8 } params_random1 = { "layout_name": "random1", "num_epochs": 120, "lr": 1e-3, "adam_eps": 1e-8 } params_random0 = { "layout_name": "random0", "num_epochs": 90, "lr": 1e-3, "adam_eps": 1e-8 } params_random3 = { "layout_name": "random3", "num_epochs": 110, "lr": 1e-3, "adam_eps": 1e-8 } all_params = [ params_simple, params_random1, params_unident, params_random0, params_random3 ] train_bc_models(all_params, seeds) # Evaluate BC models set_global_seed(64) num_rounds = 100 bc_models_evaluation = evaluate_all_bc_models(all_params, num_rounds, num_seeds) save_pickle(bc_models_evaluation, BC_MODELS_EVALUATION_PATH) print("All BC models evaluation: ", bc_models_evaluation) # These models have been manually selected to more or less match in performance, # (test BC model should be a bit better than the train BC model) selected_models = { "simple": [0, 1], "unident_s": [0, 0], "random1": [4, 2], "random0": [2, 1], "random3": [3, 3] } final_bc_model_paths = {"train": {}, "test": {}} for layout_name, seed_indices in selected_models.items(): train_idx, test_idx = seed_indices final_bc_model_paths["train"][ layout_name] = "{}_bc_train_seed{}".format(layout_name, train_idx) final_bc_model_paths["test"][layout_name] = "{}_bc_test_seed{}".format( layout_name, test_idx) best_bc_models_performance = evaluate_bc_models(final_bc_model_paths, num_rounds) save_pickle(best_bc_models_performance, BC_SAVE_DIR + "best_bc_models_performance") # Automatic selection of best BC models. Caused imbalances that made interpretation of results more difficult, # better to select manually non-best ones. # def select_bc_models(bc_models_evaluation, num_rounds, num_seeds): # best_bc_model_paths = { "train": {}, "test": {} } # for layout_name, layout_eval_dict in bc_models_evaluation.items(): # for model_type, seed_eval_dict in layout_eval_dict.items(): # best_seed = np.argmax([seed_eval_dict[i] for i in range(num_seeds)]) # best_bc_model_paths[model_type][layout_name] = "{}_bc_{}_seed{}".format(layout_name, model_type, best_seed) # save_pickle(best_bc_model_paths, BEST_BC_MODELS_PATH) # return best_bc_model_paths