def run_pbt(params): create_dir_if_not_exists(params["SAVE_DIR"]) save_dict_to_file(params, params["SAVE_DIR"] + "config") for seed in params["SEEDS"]: set_global_seed(seed) curr_seed_params = params.copy() curr_seed_params["SAVE_DIR"] += "seed_{}/".format(seed) pbt_one_run(curr_seed_params, seed)
def train_bc_models(all_params, seeds): """Train len(seeds) num of models for each layout""" for params in all_params: for seed_idx, seed in enumerate(seeds): set_global_seed(seed) model = train_bc_agent_from_hh_data(agent_name="bc_train_seed{}".format(seed_idx), model='train', **params) plot_bc_run(model.bc_info, params['num_epochs']) model = train_bc_agent_from_hh_data(agent_name="bc_test_seed{}".format(seed_idx), model='test', **params) plot_bc_run(model.bc_info, params['num_epochs']) reset_tf()
def ppo_run(params): create_dir_if_not_exists(params["SAVE_DIR"]) save_pickle(params, params["SAVE_DIR"] + "config") ############# # PPO SETUP # ############# train_infos = [] for seed in params["SEEDS"]: reset_tf() set_global_seed(seed) curr_seed_dir = params["SAVE_DIR"] + "seed" + str(seed) + "/" create_dir_if_not_exists(curr_seed_dir) save_pickle(params, curr_seed_dir + "config") print("Creating env with params", params) # Configure mdp mdp = OvercookedGridworld.from_layout_name(**params["mdp_params"]) env = OvercookedEnv(mdp, **params["env_params"]) mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=True) # Configure gym env gym_env = get_vectorized_gym_env( env, 'Overcooked-v0', featurize_fn=lambda x: mdp.lossless_state_encoding(x), **params ) gym_env.self_play_randomization = 0 if params["SELF_PLAY_HORIZON"] is None else 1 gym_env.trajectory_sp = params["TRAJECTORY_SELF_PLAY"] gym_env.update_reward_shaping_param(1 if params["mdp_params"]["rew_shaping_params"] != 0 else 0) configure_other_agent(params, gym_env, mlp, mdp) # Create model with tf.device('/device:GPU:{}'.format(params["GPU_ID"])): model = create_model(gym_env, "ppo_agent", **params) # Train model params["CURR_SEED"] = seed train_info = update_model(gym_env, model, **params) # Save model save_ppo_model(model, curr_seed_dir + model.agent_name) print("Saved training info at", curr_seed_dir + "training_info") save_pickle(train_info, curr_seed_dir + "training_info") train_infos.append(train_info) return train_infos
def run_all_bc_experiments(): # Train BC models seeds = [5415, 2652, 6440, 1965, 6647] num_seeds = len(seeds) params_unident = {"layout_name": "unident_s", "num_epochs": 120, "lr": 1e-3, "adam_eps":1e-8} params_simple = {"layout_name": "simple", "num_epochs": 100, "lr": 1e-3, "adam_eps":1e-8} params_random1 = {"layout_name": "random1", "num_epochs": 120, "lr": 1e-3, "adam_eps":1e-8} params_random0 = {"layout_name": "random0", "num_epochs": 90, "lr": 1e-3, "adam_eps":1e-8} params_random3 = {"layout_name": "random3", "num_epochs": 110, "lr": 1e-3, "adam_eps":1e-8} all_params = [params_simple, params_random1, params_unident, params_random0, params_random3] train_bc_models(all_params, seeds) # Evaluate BC models set_global_seed(64) num_rounds = 100 bc_models_evaluation = evaluate_all_bc_models(all_params, num_rounds, num_seeds) save_pickle(bc_models_evaluation, BC_MODELS_EVALUATION_PATH) print("All BC models evaluation: ", bc_models_evaluation) # These models have been manually selected to more or less match in performance, # (test BC model should be a bit better than the train BC model) selected_models = { "simple": [0, 1], "unident_s": [0, 0], "random1": [4, 2], "random0": [2, 1], "random3": [3, 3] } final_bc_model_paths = { "train": {}, "test": {} } for layout_name, seed_indices in selected_models.items(): train_idx, test_idx = seed_indices final_bc_model_paths["train"][layout_name] = "{}_bc_train_seed{}".format(layout_name, train_idx) final_bc_model_paths["test"][layout_name] = "{}_bc_test_seed{}".format(layout_name, test_idx) best_bc_models_performance = evaluate_bc_models(final_bc_model_paths, num_rounds) save_pickle(best_bc_models_performance, BC_SAVE_DIR + "best_bc_models_performance")
def run_all_ppo_hm_experiments(best_bc_model_paths): reset_tf() seeds = [8355, 5748, 1352, 3325, 8611] ppo_hm_model_paths = { "simple": "ppo_hm_simple", "unident_s": "ppo_hm_unident_s", "random1": "ppo_hm_random1", "random3": "ppo_hm_random3" } plot_ppo_hm_training_curves(ppo_hm_model_paths, seeds) set_global_seed(124) num_rounds = 50 ppo_hm_performance = evaluate_all_ppo_hm_models( ppo_hm_model_paths, best_bc_model_paths['test'], num_rounds, seeds, best=True) save_pickle(ppo_hm_performance, PPO_DATA_DIR + "ppo_hm_models_performance")
def run_all_ppo_bc_experiments(best_bc_model_paths): reset_tf() seeds = { "bc_train": [9456, 1887, 5578, 5987, 516], "bc_test": [2888, 7424, 7360, 4467, 184] } ppo_bc_model_paths = { 'bc_train': { "simple": "ppo_bc_train_simple", "unident_s": "ppo_bc_train_unident_s", "random1": "ppo_bc_train_random1", "random0": "ppo_bc_train_random0", "random3": "ppo_bc_train_random3" }, 'bc_test': { "simple": "ppo_bc_test_simple", "unident_s": "ppo_bc_test_unident_s", "random1": "ppo_bc_test_random1", "random0": "ppo_bc_test_random0", "random3": "ppo_bc_test_random3" } } plot_runs_training_curves(ppo_bc_model_paths, seeds, save=True) set_global_seed(248) num_rounds = 100 ppo_bc_performance = evaluate_all_ppo_bc_models(ppo_bc_model_paths, best_bc_model_paths, num_rounds, seeds, best=True) ppo_bc_performance = prepare_nested_default_dict_for_pickle( ppo_bc_performance) save_pickle(ppo_bc_performance, PPO_DATA_DIR + "ppo_bc_models_performance")
def setUp(self): set_global_seed(0) self.bc_params = get_bc_params( **{"data_path": DUMMY_2019_CLEAN_HUMAN_DATA_PATH}) self.bc_params["mdp_params"]["layout_name"] = "cramped_room" self.bc_params["training_params"]["epochs"] = 1 self.model_dir = os.path.join(BC_SAVE_DIR, "test_model") if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) processed_trajs, _ = get_trajs_from_data(** self.bc_params["data_params"], silent=True) self.dummy_input = np.vstack(processed_trajs["ep_states"])[:1, :] self.initial_states = [ np.zeros((1, self.bc_params['cell_size'])), np.zeros((1, self.bc_params['cell_size'])) ] with open(BC_EXPECTED_DATA_PATH, "rb") as f: self.expected = pickle.load(f) # Disable TF warnings and infos tf.get_logger().setLevel('ERROR')
def run_all_ppo_sp_experiments(best_bc_model_paths): reset_tf() seeds = [2229, 7649, 7225, 9807, 386] ppo_sp_model_paths = { "simple": "ppo_sp_simple", "unident_s": "ppo_sp_unident_s", "random1": "ppo_sp_random1", "random0": "ppo_sp_random0", "random3": "ppo_sp_random3" } plot_ppo_sp_training_curves(ppo_sp_model_paths, seeds, save=True) set_global_seed(124) num_rounds = 100 ppo_sp_performance = evaluate_all_sp_ppo_models( ppo_sp_model_paths, best_bc_model_paths['test'], num_rounds, seeds, best=True) save_pickle(ppo_sp_performance, PPO_DATA_DIR + "ppo_sp_models_performance")
def run_all_pbt_experiments(best_bc_model_paths): # best_bc_models = load_pickle("data/bc_runs/best_bc_models") seeds = [8015, 3554, 581, 5608, 4221] pbt_model_paths = { "simple": "pbt_simple", "unident_s": "pbt_unident_s", "random1": "pbt_random1", "random3": "pbt_random3", "random0": "pbt_random0" } # Plotting plot_pbt_runs(pbt_model_paths, seeds, save=True) # Evaluating set_global_seed(512) num_rounds = 100 pbt_performance = evaluate_all_pbt_models(pbt_model_paths, best_bc_model_paths, num_rounds, seeds, best=False) save_pickle(pbt_performance, PBT_DATA_DIR + "pbt_performance")
def setUp(self): reset_tf() set_global_seed(0)
def run_all_bc_experiments(): # Train BC models seeds = [5415, 2652, 6440, 1965, 6647] num_seeds = len(seeds) params_unident = { "layout_name": "unident_s", "num_epochs": 120, "lr": 1e-3, "adam_eps": 1e-8 } params_simple = { "layout_name": "simple", "num_epochs": 100, "lr": 1e-3, "adam_eps": 1e-8 } params_random1 = { "layout_name": "random1", "num_epochs": 120, "lr": 1e-3, "adam_eps": 1e-8 } params_random0 = { "layout_name": "random0", "num_epochs": 90, "lr": 1e-3, "adam_eps": 1e-8 } params_random3 = { "layout_name": "random3", "num_epochs": 110, "lr": 1e-3, "adam_eps": 1e-8 } all_params = [ params_simple, params_random1, params_unident, params_random0, params_random3 ] train_bc_models(all_params, seeds) # Evaluate BC models set_global_seed(64) num_rounds = 100 bc_models_evaluation = evaluate_all_bc_models(all_params, num_rounds, num_seeds) save_pickle(bc_models_evaluation, BC_MODELS_EVALUATION_PATH) print("All BC models evaluation: ", bc_models_evaluation) # These models have been manually selected to more or less match in performance, # (test BC model should be a bit better than the train BC model) selected_models = { "simple": [0, 1], "unident_s": [0, 0], "random1": [4, 2], "random0": [2, 1], "random3": [3, 3] } final_bc_model_paths = {"train": {}, "test": {}} for layout_name, seed_indices in selected_models.items(): train_idx, test_idx = seed_indices final_bc_model_paths["train"][ layout_name] = "{}_bc_train_seed{}".format(layout_name, train_idx) final_bc_model_paths["test"][layout_name] = "{}_bc_test_seed{}".format( layout_name, test_idx) best_bc_models_performance = evaluate_bc_models(final_bc_model_paths, num_rounds) save_pickle(best_bc_models_performance, BC_SAVE_DIR + "best_bc_models_performance") # Automatic selection of best BC models. Caused imbalances that made interpretation of results more difficult, # better to select manually non-best ones. # def select_bc_models(bc_models_evaluation, num_rounds, num_seeds): # best_bc_model_paths = { "train": {}, "test": {} } # for layout_name, layout_eval_dict in bc_models_evaluation.items(): # for model_type, seed_eval_dict in layout_eval_dict.items(): # best_seed = np.argmax([seed_eval_dict[i] for i in range(num_seeds)]) # best_bc_model_paths[model_type][layout_name] = "{}_bc_{}_seed{}".format(layout_name, model_type, best_seed) # save_pickle(best_bc_model_paths, BEST_BC_MODELS_PATH) # return best_bc_model_paths