예제 #1
0
def evaluate_bc_models(bc_model_paths, num_rounds):
    """
    Evaluate BC models passed in over `num_rounds` rounds
    """
    best_bc_models_performance = {}

    # Evaluate best
    for layout_name in bc_model_paths['train'].keys():
        print(layout_name)
        best_bc_models_performance[layout_name] = {}
        
        eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['train'][layout_name])
        best_bc_models_performance[layout_name]["BC_train+BC_train"] = mean_and_std_err(eval_trajs['ep_returns'])
        
        eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['test'][layout_name])
        best_bc_models_performance[layout_name]["BC_test+BC_test"] = mean_and_std_err(eval_trajs['ep_returns'])

        bc_train, bc_params_train = get_bc_agent_from_saved(bc_model_paths['train'][layout_name])
        bc_test, bc_params_test = get_bc_agent_from_saved(bc_model_paths['test'][layout_name])
        del bc_params_train["data_params"]
        del bc_params_test["data_params"]
        assert common_keys_equal(bc_params_train, bc_params_test)
        ae = AgentEvaluator(mdp_params=bc_params_train["mdp_params"], env_params=bc_params_train["env_params"])
        
        train_and_test = ae.evaluate_agent_pair(AgentPair(bc_train, bc_test), num_games=num_rounds)
        best_bc_models_performance[layout_name]["BC_train+BC_test_0"] = mean_and_std_err(train_and_test['ep_returns'])

        test_and_train = ae.evaluate_agent_pair(AgentPair(bc_test, bc_train), num_games=num_rounds)
        best_bc_models_performance[layout_name]["BC_train+BC_test_1"] = mean_and_std_err(test_and_train['ep_returns'])
    
    return best_bc_models_performance
예제 #2
0
def evaluate_pbt_for_layout(layout_name,
                            num_rounds,
                            pbt_performance,
                            pbt_model_paths,
                            best_test_bc_models,
                            seeds,
                            best=False):
    bc_agent, bc_params = get_bc_agent_from_saved(
        model_name=best_test_bc_models[layout_name])
    ae = AgentEvaluator(mdp_params=bc_params["mdp_params"],
                        env_params=bc_params["env_params"])

    pbt_save_dir = PBT_DATA_DIR + pbt_model_paths[layout_name] + "/"
    pbt_config = load_dict_from_txt(pbt_save_dir + "config")
    assert common_keys_equal(
        bc_params["mdp_params"], pbt_config["mdp_params"]
    ), "Mdp params differed between PBT and BC models training"
    assert common_keys_equal(
        bc_params["env_params"], pbt_config["env_params"]
    ), "Env params differed between PBT and BC models training"

    pbt_agents = [
        get_pbt_agent_from_config(pbt_save_dir,
                                  pbt_config["sim_threads"],
                                  seed=seed,
                                  agent_idx=0,
                                  best=best) for seed in seeds
    ]
    eval_pbt_over_seeds(pbt_agents, bc_agent, layout_name, num_rounds,
                        pbt_performance, ae)
    return pbt_performance
예제 #3
0
def evaluate_ppo_hm_and_bc(layout,
                           ppo_hm_path,
                           bc_test_path,
                           num_rounds,
                           seeds,
                           best=False,
                           display=False):
    ppo_hm_performance = defaultdict(lambda: defaultdict(list))

    agent_bc_test, bc_params = get_bc_agent_from_saved(bc_test_path)
    del bc_params["data_params"]
    del bc_params["mdp_fn_params"]
    evaluator = AgentEvaluator(**bc_params)

    for seed in seeds:
        agent_ppo, _ = get_ppo_agent(ppo_hm_path, seed, best=best)

        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_hm_performance[layout]["PPO_HM+BC_test_0"].append(avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_hm_performance[layout]["PPO_HM+BC_test_1"].append(avg_bc_and_ppo)

    return ppo_hm_performance
예제 #4
0
    def test_running_ppo_bc_train(self):
        # Check model exists and has right params
        layout_name = 'simple'
        best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH)
        bc_model_path = best_bc_model_paths["train"][layout_name]

        print("LOADING BC MODEL FROM: {}".format(bc_model_path))
        _, bc_params = get_bc_agent_from_saved(bc_model_path)

        expected_bc_params = {'data_params': {'train_mdps': ['simple'], 'ordered_trajs': True, 'human_ai_trajs': False, 'data_path': 'data/human/clean_train_trials.pkl'}, 'mdp_params': {'layout_name': 'simple', 'start_order_list': None}, 'env_params': {'horizon': 400}, 'mdp_fn_params': {}}
        self.assertDictEqual(expected_bc_params, bc_params)

        # Run twice with same seed and compare output dicts. Did not do as above because additional dependency on the human model

        reset_tf()
        run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': layout_name, 'OTHER_AGENT_TYPE': 'bc_train', 'SEEDS': [10]})
        train_info0 = run.result[0]

        reset_tf()
        run = ex_ppo.run(config_updates={'LOCAL_TESTING': True, 'layout_name': layout_name, 'OTHER_AGENT_TYPE': 'bc_train', 'SEEDS': [10]})
        train_info1 = run.result[0]

        self.assertDictEqual(train_info0, train_info1)

        # Uncomment to make current output standard output to check against
        # save_pickle(train_info1, 'data/testing/ppo_bc_train_info')

        expected_dict = load_pickle('data/testing/ppo_bc_train_info')
        for k, v in train_info1.items():
            for found_item, expected_item in zip(v, expected_dict[k]):
                self.assertAlmostEqual(found_item, expected_item, places=5)
def evaluate_layout_loss_for_bc_models(best_bc_model_paths, layout_name, trajs,
                                       eps):
    # TODO Check this isn't stochastic
    layout_losses = defaultdict(dict)
    model_name = best_bc_model_paths["train"][layout_name]
    bc_train, _ = get_bc_agent_from_saved(model_name=model_name)

    model_name = best_bc_model_paths["test"][layout_name]
    bc_test, _ = get_bc_agent_from_saved(model_name=model_name)

    bc_agents = {"train": bc_train, "test": bc_test}
    for agent_type, bc_agent in bc_agents.items():
        bc_agent.action_probs = True
        bc_agent.stochastic = False
        bc_agent.will_unblock_if_stuck = False

        losses, accuracies = get_trajs_losses_for_model(trajs, bc_agent, eps)
        layout_losses[agent_type]['losses'] = losses
        layout_losses[agent_type]['accuracies'] = accuracies
    return layout_losses
예제 #6
0
def setup_game(run_type, run_dir, run_seed, agent_num, player_idx):
    if run_type == "ppo":
        print("Seed", run_seed)
        agent, config = get_ppo_agent(run_dir, run_seed, best=True)
    elif run_type == "pbt":
        run_path = "data/" + run_type + "_runs/" + run_dir + "/seed_{}".format(
            run_seed)
        config = load_dict_from_file(run_path + "/config.txt")

        agent_path = run_path + '/agent' + str(agent_num) + "/best"
        agent = get_agent_from_saved_model(agent_path, config["sim_threads"])
    elif run_type == "bc":
        agent, config = get_bc_agent_from_saved(run_dir)
    else:
        raise ValueError("Unrecognized run type")

    env = OvercookedEnv(
        OvercookedGridworld.from_layout_name(**config["mdp_params"]),
        **config["env_params"])
    return env, agent, player_idx
예제 #7
0
파일: ppo.py 프로젝트: 51616/human_aware_rl
def configure_other_agent(params, gym_env, mlp, mdp):
    if params["OTHER_AGENT_TYPE"] == "hm":
        hl_br, hl_temp, ll_br, ll_temp = params["HM_PARAMS"]
        agent = GreedyHumanModel(mlp, hl_boltzmann_rational=hl_br, hl_temp=hl_temp, ll_boltzmann_rational=ll_br, ll_temp=ll_temp)
        gym_env.use_action_method = True

    elif params["OTHER_AGENT_TYPE"][:2] == "bc":
        best_bc_model_paths = load_pickle(BEST_BC_MODELS_PATH)
        if params["OTHER_AGENT_TYPE"] == "bc_train":
            bc_model_path = best_bc_model_paths["train"][mdp.layout_name]
        elif params["OTHER_AGENT_TYPE"] == "bc_test":
            bc_model_path = best_bc_model_paths["test"][mdp.layout_name]
        else:
            raise ValueError("Other agent type must be bc train or bc test")

        print("LOADING BC MODEL FROM: {}".format(bc_model_path))
        agent, bc_params = get_bc_agent_from_saved(bc_model_path)
        gym_env.use_action_method = True
        # Make sure environment params are the same in PPO as in the BC model
        for k, v in bc_params["env_params"].items():
            assert v == params["env_params"][k], "{} did not match. env_params: {} \t PPO params: {}".format(k, v, params[k])
        for k, v in bc_params["mdp_params"].items():
            assert v == params["mdp_params"][k], "{} did not match. mdp_params: {} \t PPO params: {}".format(k, v, params[k])

    elif params["OTHER_AGENT_TYPE"] == "rnd":
        agent = RandomAgent()

    elif params["OTHER_AGENT_TYPE"] == "sp":
        gym_env.self_play_randomization = 1
    elif params["OTHER_AGENT_TYPE"] == "sampling_sp":
        agent = RandomAgent() # just a place holder, will be replace in training loop
        gym_env.self_play_randomization = 0.0 # sp with itself 30% of the time, the rest is sampling_sp
        # gym_env.use_action_method = True

    else:
        raise ValueError("unknown type of agent to match with")
        
    if not params["OTHER_AGENT_TYPE"] == "sp":
        assert mlp.mdp == mdp
        agent.set_mdp(mdp)
        gym_env.other_agent = agent
예제 #8
0
def evaluate_ppo_and_bc_models_for_layout(layout,
                                          num_rounds,
                                          bc_model_paths,
                                          ppo_bc_model_paths,
                                          seeds,
                                          best=False,
                                          display=False):
    assert len(seeds["bc_train"]) == len(seeds["bc_test"])
    ppo_bc_performance = defaultdict(lambda: defaultdict(list))

    agent_bc_test, bc_params = get_bc_agent_from_saved(
        bc_model_paths['test'][layout])
    ppo_bc_train_path = ppo_bc_model_paths['bc_train'][layout]
    ppo_bc_test_path = ppo_bc_model_paths['bc_test'][layout]
    evaluator = AgentEvaluator(mdp_params=bc_params["mdp_params"],
                               env_params=bc_params["env_params"])

    for seed_idx in range(len(seeds["bc_train"])):
        agent_ppo_bc_train, ppo_config = get_ppo_agent(
            ppo_bc_train_path, seeds["bc_train"][seed_idx], best=best)
        assert common_keys_equal(bc_params["mdp_params"],
                                 ppo_config["mdp_params"])

        # For curiosity, how well does agent do with itself?
        # ppo_and_ppo = evaluator.evaluate_agent_pair(AgentPair(agent_ppo_bc_train, agent_ppo_bc_train), num_games=max(int(num_rounds/2), 1), display=display)
        # avg_ppo_and_ppo = np.mean(ppo_and_ppo['ep_returns'])
        # ppo_bc_performance[layout]["PPO_BC_train+PPO_BC_train"].append(avg_ppo_and_ppo)

        # How well it generalizes to new agent in simulation?
        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo_bc_train, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_train+BC_test_0"].append(
            avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo_bc_train),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_train+BC_test_1"].append(
            avg_bc_and_ppo)

        # How well could we do if we knew true model BC_test?
        agent_ppo_bc_test, ppo_config = get_ppo_agent(
            ppo_bc_test_path, seeds["bc_test"][seed_idx], best=best)
        assert common_keys_equal(bc_params["mdp_params"],
                                 ppo_config["mdp_params"])

        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo_bc_test, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_test+BC_test_0"].append(
            avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_test+BC_test_1"].append(
            avg_bc_and_ppo)

    return ppo_bc_performance
예제 #9
0
def P_BC_evaluation_for_layout(ae, layout, best_bc_models):

    delivery_horizon = get_delivery_horizon(layout)
    print("Delivery horizon for layout {}: {}".format(layout,
                                                      delivery_horizon))

    layout_p_bc_eval = {}

    #######################
    # P_BC_test + BC_test #
    #######################

    # Prepare BC_test
    test_model_name = best_bc_models["test"][layout]
    agent_bc_test, _ = get_bc_agent_from_saved(test_model_name)
    agent_bc_test.stochastic = False

    # Prepare P_BC_test (making another copy of BC_test just to be embedded in P_BC)
    agent_bc_test_embedded, _ = get_bc_agent_from_saved(test_model_name)
    agent_bc_test_embedded.stochastic = False
    p_bc_test = EmbeddedPlanningAgent(agent_bc_test_embedded,
                                      agent_bc_test_embedded.mlp,
                                      delivery_horizon)
    p_bc_test.env = ae.env
    p_bc_test.debug = True

    # Execute runs
    ap_training = AgentPair(p_bc_test, agent_bc_test)
    data0 = ae.evaluate_agent_pair(ap_training, num_games=1, display=True)
    layout_p_bc_eval['P_BC_test+BC_test_0'] = data0['ep_returns'][0]

    ap_training = AgentPair(agent_bc_test, p_bc_test)
    data1 = ae.evaluate_agent_pair(ap_training, num_games=1, display=True)
    layout_p_bc_eval['P_BC_test+BC_test_1'] = data1['ep_returns'][0]
    print("P_BC_test + BC_test", data0['ep_returns'][0],
          data1['ep_returns'][0])

    ########################
    # P_BC_train + BC_test #
    ########################

    # Prepare P_BC_train
    train_model_name = best_bc_models["train"][layout]
    agent_bc_train_embedded, _ = get_bc_agent_from_saved(train_model_name)
    agent_bc_train_embedded.stochastic = False
    p_bc_train = EmbeddedPlanningAgent(agent_bc_train_embedded,
                                       agent_bc_train_embedded.mlp,
                                       delivery_horizon)
    p_bc_train.env = ae.env
    p_bc_train.debug = True

    # Execute runs
    ap_testing = AgentPair(p_bc_train, agent_bc_test)
    data0 = ae.evaluate_agent_pair(ap_testing, num_games=1, display=True)
    layout_p_bc_eval['P_BC_train+BC_test_0'] = data0['ep_returns'][0]

    ap_testing = AgentPair(agent_bc_test, p_bc_train)
    data1 = ae.evaluate_agent_pair(ap_testing, num_games=1, display=True)
    layout_p_bc_eval['P_BC_train+BC_test_1'] = data1['ep_returns'][0]
    print("P_BC_train + BC_test", data0['ep_returns'][0],
          data1['ep_returns'][0])

    return layout_p_bc_eval