예제 #1
0
def run_pbt(params):
    create_dir_if_not_exists(params["SAVE_DIR"])
    save_dict_to_file(params, params["SAVE_DIR"] + "config")
    for seed in params["SEEDS"]:
        set_global_seed(seed)
        curr_seed_params = params.copy()
        curr_seed_params["SAVE_DIR"] += "seed_{}/".format(seed)
        pbt_one_run(curr_seed_params, seed)
예제 #2
0
def train_bc_models(all_params, seeds):
    """Train len(seeds) num of models for each layout"""
    for params in all_params:
        for seed_idx, seed in enumerate(seeds):
            set_global_seed(seed)
            model = train_bc_agent_from_hh_data(agent_name="bc_train_seed{}".format(seed_idx), model='train', **params)
            plot_bc_run(model.bc_info, params['num_epochs'])
            model = train_bc_agent_from_hh_data(agent_name="bc_test_seed{}".format(seed_idx), model='test', **params)
            plot_bc_run(model.bc_info, params['num_epochs'])
            reset_tf()
예제 #3
0
def ppo_run(params):

    create_dir_if_not_exists(params["SAVE_DIR"])
    save_pickle(params, params["SAVE_DIR"] + "config")

    #############
    # PPO SETUP #
    #############

    train_infos = []

    for seed in params["SEEDS"]:
        reset_tf()
        set_global_seed(seed)

        curr_seed_dir = params["SAVE_DIR"] + "seed" + str(seed) + "/"
        create_dir_if_not_exists(curr_seed_dir)

        save_pickle(params, curr_seed_dir + "config")

        print("Creating env with params", params)
        # Configure mdp
        
        mdp = OvercookedGridworld.from_layout_name(**params["mdp_params"])
        env = OvercookedEnv(mdp, **params["env_params"])
        mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=True) 

        # Configure gym env
        gym_env = get_vectorized_gym_env(
            env, 'Overcooked-v0', featurize_fn=lambda x: mdp.lossless_state_encoding(x), **params
        )
        gym_env.self_play_randomization = 0 if params["SELF_PLAY_HORIZON"] is None else 1
        gym_env.trajectory_sp = params["TRAJECTORY_SELF_PLAY"]
        gym_env.update_reward_shaping_param(1 if params["mdp_params"]["rew_shaping_params"] != 0 else 0)

        configure_other_agent(params, gym_env, mlp, mdp)

        # Create model
        with tf.device('/device:GPU:{}'.format(params["GPU_ID"])):
            model = create_model(gym_env, "ppo_agent", **params)

        # Train model
        params["CURR_SEED"] = seed
        train_info = update_model(gym_env, model, **params)
        
        # Save model
        save_ppo_model(model, curr_seed_dir + model.agent_name)
        print("Saved training info at", curr_seed_dir + "training_info")
        save_pickle(train_info, curr_seed_dir + "training_info")
        train_infos.append(train_info)
    
    return train_infos
예제 #4
0
def run_all_bc_experiments():
    # Train BC models
    seeds = [5415, 2652, 6440, 1965, 6647]
    num_seeds = len(seeds)

    params_unident = {"layout_name": "unident_s", "num_epochs": 120, "lr": 1e-3, "adam_eps":1e-8}
    params_simple = {"layout_name": "simple", "num_epochs": 100, "lr": 1e-3, "adam_eps":1e-8}
    params_random1 = {"layout_name": "random1", "num_epochs": 120, "lr": 1e-3, "adam_eps":1e-8}
    params_random0 = {"layout_name": "random0", "num_epochs": 90, "lr": 1e-3, "adam_eps":1e-8}
    params_random3 = {"layout_name": "random3", "num_epochs": 110, "lr": 1e-3, "adam_eps":1e-8}

    all_params = [params_simple, params_random1, params_unident, params_random0, params_random3]
    train_bc_models(all_params, seeds)

    # Evaluate BC models
    set_global_seed(64)

    num_rounds = 100
    bc_models_evaluation = evaluate_all_bc_models(all_params, num_rounds, num_seeds)
    save_pickle(bc_models_evaluation, BC_MODELS_EVALUATION_PATH)
    print("All BC models evaluation: ", bc_models_evaluation)

    # These models have been manually selected to more or less match in performance,
    # (test BC model should be a bit better than the train BC model)
    selected_models = {
        "simple": [0, 1],
        "unident_s": [0, 0],
        "random1": [4, 2],
        "random0": [2, 1],
        "random3": [3, 3]
    }

    final_bc_model_paths = { "train": {}, "test": {} }
    for layout_name, seed_indices in selected_models.items():
        train_idx, test_idx = seed_indices
        final_bc_model_paths["train"][layout_name] = "{}_bc_train_seed{}".format(layout_name, train_idx)
        final_bc_model_paths["test"][layout_name] = "{}_bc_test_seed{}".format(layout_name, test_idx)

    best_bc_models_performance = evaluate_bc_models(final_bc_model_paths, num_rounds)
    save_pickle(best_bc_models_performance, BC_SAVE_DIR + "best_bc_models_performance")
예제 #5
0
def run_all_ppo_hm_experiments(best_bc_model_paths):
    reset_tf()

    seeds = [8355, 5748, 1352, 3325, 8611]

    ppo_hm_model_paths = {
        "simple": "ppo_hm_simple",
        "unident_s": "ppo_hm_unident_s",
        "random1": "ppo_hm_random1",
        "random3": "ppo_hm_random3"
    }

    plot_ppo_hm_training_curves(ppo_hm_model_paths, seeds)

    set_global_seed(124)
    num_rounds = 50
    ppo_hm_performance = evaluate_all_ppo_hm_models(
        ppo_hm_model_paths,
        best_bc_model_paths['test'],
        num_rounds,
        seeds,
        best=True)
    save_pickle(ppo_hm_performance, PPO_DATA_DIR + "ppo_hm_models_performance")
예제 #6
0
def run_all_ppo_bc_experiments(best_bc_model_paths):
    reset_tf()

    seeds = {
        "bc_train": [9456, 1887, 5578, 5987, 516],
        "bc_test": [2888, 7424, 7360, 4467, 184]
    }

    ppo_bc_model_paths = {
        'bc_train': {
            "simple": "ppo_bc_train_simple",
            "unident_s": "ppo_bc_train_unident_s",
            "random1": "ppo_bc_train_random1",
            "random0": "ppo_bc_train_random0",
            "random3": "ppo_bc_train_random3"
        },
        'bc_test': {
            "simple": "ppo_bc_test_simple",
            "unident_s": "ppo_bc_test_unident_s",
            "random1": "ppo_bc_test_random1",
            "random0": "ppo_bc_test_random0",
            "random3": "ppo_bc_test_random3"
        }
    }

    plot_runs_training_curves(ppo_bc_model_paths, seeds, save=True)

    set_global_seed(248)
    num_rounds = 100
    ppo_bc_performance = evaluate_all_ppo_bc_models(ppo_bc_model_paths,
                                                    best_bc_model_paths,
                                                    num_rounds,
                                                    seeds,
                                                    best=True)
    ppo_bc_performance = prepare_nested_default_dict_for_pickle(
        ppo_bc_performance)
    save_pickle(ppo_bc_performance, PPO_DATA_DIR + "ppo_bc_models_performance")
    def setUp(self):
        set_global_seed(0)
        self.bc_params = get_bc_params(
            **{"data_path": DUMMY_2019_CLEAN_HUMAN_DATA_PATH})
        self.bc_params["mdp_params"]["layout_name"] = "cramped_room"
        self.bc_params["training_params"]["epochs"] = 1
        self.model_dir = os.path.join(BC_SAVE_DIR, "test_model")

        if not os.path.exists(self.model_dir):
            os.makedirs(self.model_dir)

        processed_trajs, _ = get_trajs_from_data(**
                                                 self.bc_params["data_params"],
                                                 silent=True)
        self.dummy_input = np.vstack(processed_trajs["ep_states"])[:1, :]
        self.initial_states = [
            np.zeros((1, self.bc_params['cell_size'])),
            np.zeros((1, self.bc_params['cell_size']))
        ]
        with open(BC_EXPECTED_DATA_PATH, "rb") as f:
            self.expected = pickle.load(f)

        # Disable TF warnings and infos
        tf.get_logger().setLevel('ERROR')
예제 #8
0
def run_all_ppo_sp_experiments(best_bc_model_paths):
    reset_tf()

    seeds = [2229, 7649, 7225, 9807, 386]

    ppo_sp_model_paths = {
        "simple": "ppo_sp_simple",
        "unident_s": "ppo_sp_unident_s",
        "random1": "ppo_sp_random1",
        "random0": "ppo_sp_random0",
        "random3": "ppo_sp_random3"
    }

    plot_ppo_sp_training_curves(ppo_sp_model_paths, seeds, save=True)

    set_global_seed(124)
    num_rounds = 100
    ppo_sp_performance = evaluate_all_sp_ppo_models(
        ppo_sp_model_paths,
        best_bc_model_paths['test'],
        num_rounds,
        seeds,
        best=True)
    save_pickle(ppo_sp_performance, PPO_DATA_DIR + "ppo_sp_models_performance")
def run_all_pbt_experiments(best_bc_model_paths):
    # best_bc_models = load_pickle("data/bc_runs/best_bc_models")
    seeds = [8015, 3554, 581, 5608, 4221]

    pbt_model_paths = {
        "simple": "pbt_simple",
        "unident_s": "pbt_unident_s",
        "random1": "pbt_random1",
        "random3": "pbt_random3",
        "random0": "pbt_random0"
    }

    # Plotting
    plot_pbt_runs(pbt_model_paths, seeds, save=True)

    # Evaluating
    set_global_seed(512)
    num_rounds = 100
    pbt_performance = evaluate_all_pbt_models(pbt_model_paths,
                                              best_bc_model_paths,
                                              num_rounds,
                                              seeds,
                                              best=False)
    save_pickle(pbt_performance, PBT_DATA_DIR + "pbt_performance")
 def setUp(self):
     reset_tf()
     set_global_seed(0)
예제 #11
0
def run_all_bc_experiments():
    # Train BC models
    seeds = [5415, 2652, 6440, 1965, 6647]
    num_seeds = len(seeds)

    params_unident = {
        "layout_name": "unident_s",
        "num_epochs": 120,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }
    params_simple = {
        "layout_name": "simple",
        "num_epochs": 100,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }
    params_random1 = {
        "layout_name": "random1",
        "num_epochs": 120,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }
    params_random0 = {
        "layout_name": "random0",
        "num_epochs": 90,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }
    params_random3 = {
        "layout_name": "random3",
        "num_epochs": 110,
        "lr": 1e-3,
        "adam_eps": 1e-8
    }

    all_params = [
        params_simple, params_random1, params_unident, params_random0,
        params_random3
    ]
    train_bc_models(all_params, seeds)

    # Evaluate BC models
    set_global_seed(64)

    num_rounds = 100
    bc_models_evaluation = evaluate_all_bc_models(all_params, num_rounds,
                                                  num_seeds)
    save_pickle(bc_models_evaluation, BC_MODELS_EVALUATION_PATH)
    print("All BC models evaluation: ", bc_models_evaluation)

    # These models have been manually selected to more or less match in performance,
    # (test BC model should be a bit better than the train BC model)
    selected_models = {
        "simple": [0, 1],
        "unident_s": [0, 0],
        "random1": [4, 2],
        "random0": [2, 1],
        "random3": [3, 3]
    }

    final_bc_model_paths = {"train": {}, "test": {}}
    for layout_name, seed_indices in selected_models.items():
        train_idx, test_idx = seed_indices
        final_bc_model_paths["train"][
            layout_name] = "{}_bc_train_seed{}".format(layout_name, train_idx)
        final_bc_model_paths["test"][layout_name] = "{}_bc_test_seed{}".format(
            layout_name, test_idx)

    best_bc_models_performance = evaluate_bc_models(final_bc_model_paths,
                                                    num_rounds)
    save_pickle(best_bc_models_performance,
                BC_SAVE_DIR + "best_bc_models_performance")


# Automatic selection of best BC models. Caused imbalances that made interpretation of results more difficult,
# better to select manually non-best ones.

# def select_bc_models(bc_models_evaluation, num_rounds, num_seeds):
#     best_bc_model_paths = { "train": {}, "test": {} }

#     for layout_name, layout_eval_dict in bc_models_evaluation.items():
#         for model_type, seed_eval_dict in layout_eval_dict.items():
#             best_seed = np.argmax([seed_eval_dict[i] for i in range(num_seeds)])
#             best_bc_model_paths[model_type][layout_name] = "{}_bc_{}_seed{}".format(layout_name, model_type, best_seed)

#     save_pickle(best_bc_model_paths, BEST_BC_MODELS_PATH)
#     return best_bc_model_paths