예제 #1
0
def evaluate_ppo_hm_and_bc(layout,
                           ppo_hm_path,
                           bc_test_path,
                           num_rounds,
                           seeds,
                           best=False,
                           display=False):
    ppo_hm_performance = defaultdict(lambda: defaultdict(list))

    agent_bc_test, bc_params = get_bc_agent_from_saved(bc_test_path)
    del bc_params["data_params"]
    del bc_params["mdp_fn_params"]
    evaluator = AgentEvaluator(**bc_params)

    for seed in seeds:
        agent_ppo, _ = get_ppo_agent(ppo_hm_path, seed, best=best)

        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_hm_performance[layout]["PPO_HM+BC_test_0"].append(avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_hm_performance[layout]["PPO_HM+BC_test_1"].append(avg_bc_and_ppo)

    return ppo_hm_performance
def evaluate_layout_loss_for_ppo_models(ppo_path, layout_name, trajs, eps,
                                        seeds):
    layout_losses = defaultdict(dict)
    for seed in seeds:
        reset_tf()
        agent_ppo, bc_params = get_ppo_agent(ppo_path, seed, best=True)
        agent_ppo.action_probs = True
        agent_ppo.set_mdp(
            OvercookedGridworld.from_layout_name(**bc_params["mdp_params"]))

        losses, accuracies = get_trajs_losses_for_model(trajs, agent_ppo, eps)
        layout_losses["{}_seed{}".format(layout_name, seed)]['losses'] = losses
        layout_losses["{}_seed{}".format(layout_name,
                                         seed)]['accuracies'] = accuracies
    return layout_losses
예제 #3
0
def setup_game(run_type, run_dir, run_seed, agent_num, player_idx):
    if run_type == "ppo":
        print("Seed", run_seed)
        agent, config = get_ppo_agent(run_dir, run_seed, best=True)
    elif run_type == "pbt":
        run_path = "data/" + run_type + "_runs/" + run_dir + "/seed_{}".format(
            run_seed)
        config = load_dict_from_file(run_path + "/config.txt")

        agent_path = run_path + '/agent' + str(agent_num) + "/best"
        agent = get_agent_from_saved_model(agent_path, config["sim_threads"])
    elif run_type == "bc":
        agent, config = get_bc_agent_from_saved(run_dir)
    else:
        raise ValueError("Unrecognized run type")

    env = OvercookedEnv(
        OvercookedGridworld.from_layout_name(**config["mdp_params"]),
        **config["env_params"])
    return env, agent, player_idx
예제 #4
0
def evaluate_ppo_and_bc_models_for_layout(layout,
                                          num_rounds,
                                          bc_model_paths,
                                          ppo_bc_model_paths,
                                          seeds,
                                          best=False,
                                          display=False):
    assert len(seeds["bc_train"]) == len(seeds["bc_test"])
    ppo_bc_performance = defaultdict(lambda: defaultdict(list))

    agent_bc_test, bc_params = get_bc_agent_from_saved(
        bc_model_paths['test'][layout])
    ppo_bc_train_path = ppo_bc_model_paths['bc_train'][layout]
    ppo_bc_test_path = ppo_bc_model_paths['bc_test'][layout]
    evaluator = AgentEvaluator(mdp_params=bc_params["mdp_params"],
                               env_params=bc_params["env_params"])

    for seed_idx in range(len(seeds["bc_train"])):
        agent_ppo_bc_train, ppo_config = get_ppo_agent(
            ppo_bc_train_path, seeds["bc_train"][seed_idx], best=best)
        assert common_keys_equal(bc_params["mdp_params"],
                                 ppo_config["mdp_params"])

        # For curiosity, how well does agent do with itself?
        # ppo_and_ppo = evaluator.evaluate_agent_pair(AgentPair(agent_ppo_bc_train, agent_ppo_bc_train), num_games=max(int(num_rounds/2), 1), display=display)
        # avg_ppo_and_ppo = np.mean(ppo_and_ppo['ep_returns'])
        # ppo_bc_performance[layout]["PPO_BC_train+PPO_BC_train"].append(avg_ppo_and_ppo)

        # How well it generalizes to new agent in simulation?
        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo_bc_train, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_train+BC_test_0"].append(
            avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo_bc_train),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_train+BC_test_1"].append(
            avg_bc_and_ppo)

        # How well could we do if we knew true model BC_test?
        agent_ppo_bc_test, ppo_config = get_ppo_agent(
            ppo_bc_test_path, seeds["bc_test"][seed_idx], best=best)
        assert common_keys_equal(bc_params["mdp_params"],
                                 ppo_config["mdp_params"])

        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo_bc_test, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_test+BC_test_0"].append(
            avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_test+BC_test_1"].append(
            avg_bc_and_ppo)

    return ppo_bc_performance