Esempio n. 1
0
def evaluate_ppo_hm_and_bc(layout,
                           ppo_hm_path,
                           bc_test_path,
                           num_rounds,
                           seeds,
                           best=False,
                           display=False):
    ppo_hm_performance = defaultdict(lambda: defaultdict(list))

    agent_bc_test, bc_params = get_bc_agent_from_saved(bc_test_path)
    del bc_params["data_params"]
    del bc_params["mdp_fn_params"]
    evaluator = AgentEvaluator(**bc_params)

    for seed in seeds:
        agent_ppo, _ = get_ppo_agent(ppo_hm_path, seed, best=best)

        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_hm_performance[layout]["PPO_HM+BC_test_0"].append(avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_hm_performance[layout]["PPO_HM+BC_test_1"].append(avg_bc_and_ppo)

    return ppo_hm_performance
Esempio n. 2
0
def evaluate_bc_models(bc_model_paths, num_rounds):
    """
    Evaluate BC models passed in over `num_rounds` rounds
    """
    best_bc_models_performance = {}

    # Evaluate best
    for layout_name in bc_model_paths['train'].keys():
        print(layout_name)
        best_bc_models_performance[layout_name] = {}
        
        eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['train'][layout_name])
        best_bc_models_performance[layout_name]["BC_train+BC_train"] = mean_and_std_err(eval_trajs['ep_returns'])
        
        eval_trajs = eval_with_benchmarking_from_saved(num_rounds, bc_model_paths['test'][layout_name])
        best_bc_models_performance[layout_name]["BC_test+BC_test"] = mean_and_std_err(eval_trajs['ep_returns'])

        bc_train, bc_params_train = get_bc_agent_from_saved(bc_model_paths['train'][layout_name])
        bc_test, bc_params_test = get_bc_agent_from_saved(bc_model_paths['test'][layout_name])
        del bc_params_train["data_params"]
        del bc_params_test["data_params"]
        assert common_keys_equal(bc_params_train, bc_params_test)
        ae = AgentEvaluator(mdp_params=bc_params_train["mdp_params"], env_params=bc_params_train["env_params"])
        
        train_and_test = ae.evaluate_agent_pair(AgentPair(bc_train, bc_test), num_games=num_rounds)
        best_bc_models_performance[layout_name]["BC_train+BC_test_0"] = mean_and_std_err(train_and_test['ep_returns'])

        test_and_train = ae.evaluate_agent_pair(AgentPair(bc_test, bc_train), num_games=num_rounds)
        best_bc_models_performance[layout_name]["BC_train+BC_test_1"] = mean_and_std_err(test_and_train['ep_returns'])
    
    return best_bc_models_performance
 def test_embedded_planning_agent(self):
     agent_evaluator = AgentEvaluator({"layout_name": "cramped_room"},
                                      {"horizon": 100})
     other_agent = GreedyHumanModel(agent_evaluator.mlp)
     epa = EmbeddedPlanningAgent(other_agent,
                                 agent_evaluator.mlp,
                                 agent_evaluator.env,
                                 delivery_horizon=1)
     ap = AgentPair(epa, other_agent)
     agent_evaluator.evaluate_agent_pair(ap, num_games=1, display=DISPLAY)
class TestAgentEvaluator(unittest.TestCase):
    def setUp(self):
        self.agent_eval = AgentEvaluator({"layout_name": "cramped_room"},
                                         {"horizon": 100})

    def test_human_model_pair(self):
        trajs = self.agent_eval.evaluate_human_model_pair()
        try:
            AgentEvaluator.check_trajectories(trajs)
        except AssertionError as e:
            self.fail("Trajectories were not returned in standard format:\n{}".
                      format(e))

    def test_rollouts(self):
        ap = AgentPair(RandomAgent(), RandomAgent())
        trajs = self.agent_eval.evaluate_agent_pair(ap, num_games=5)
        try:
            AgentEvaluator.check_trajectories(trajs)
        except AssertionError as e:
            self.fail("Trajectories were not returned in standard format:\n{}".
                      format(e))

    def test_mlp_computation(self):
        try:
            self.agent_eval.mlp
        except Exception as e:
            self.fail("Failed to compute MediumLevelPlanner:\n{}".format(e))
def eval_with_benchmarking_from_model(n_games, model, bc_params, no_waits, display=False):
    bc_params = copy.deepcopy(bc_params)
    a0 = get_bc_agent_from_model(model, bc_params, no_waits)
    a1 = get_bc_agent_from_model(model, bc_params, no_waits)
    del bc_params["data_params"], bc_params["mdp_fn_params"]
    a_eval = AgentEvaluator(**bc_params)
    ap = AgentPair(a0, a1)
    trajectories = a_eval.evaluate_agent_pair(ap, num_games=n_games, display=display)
    return trajectories
Esempio n. 6
0
def evaluate_ppo_and_bc_models_for_layout(layout,
                                          num_rounds,
                                          bc_model_paths,
                                          ppo_bc_model_paths,
                                          seeds,
                                          best=False,
                                          display=False):
    assert len(seeds["bc_train"]) == len(seeds["bc_test"])
    ppo_bc_performance = defaultdict(lambda: defaultdict(list))

    agent_bc_test, bc_params = get_bc_agent_from_saved(
        bc_model_paths['test'][layout])
    ppo_bc_train_path = ppo_bc_model_paths['bc_train'][layout]
    ppo_bc_test_path = ppo_bc_model_paths['bc_test'][layout]
    evaluator = AgentEvaluator(mdp_params=bc_params["mdp_params"],
                               env_params=bc_params["env_params"])

    for seed_idx in range(len(seeds["bc_train"])):
        agent_ppo_bc_train, ppo_config = get_ppo_agent(
            ppo_bc_train_path, seeds["bc_train"][seed_idx], best=best)
        assert common_keys_equal(bc_params["mdp_params"],
                                 ppo_config["mdp_params"])

        # For curiosity, how well does agent do with itself?
        # ppo_and_ppo = evaluator.evaluate_agent_pair(AgentPair(agent_ppo_bc_train, agent_ppo_bc_train), num_games=max(int(num_rounds/2), 1), display=display)
        # avg_ppo_and_ppo = np.mean(ppo_and_ppo['ep_returns'])
        # ppo_bc_performance[layout]["PPO_BC_train+PPO_BC_train"].append(avg_ppo_and_ppo)

        # How well it generalizes to new agent in simulation?
        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo_bc_train, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_train+BC_test_0"].append(
            avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo_bc_train),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_train+BC_test_1"].append(
            avg_bc_and_ppo)

        # How well could we do if we knew true model BC_test?
        agent_ppo_bc_test, ppo_config = get_ppo_agent(
            ppo_bc_test_path, seeds["bc_test"][seed_idx], best=best)
        assert common_keys_equal(bc_params["mdp_params"],
                                 ppo_config["mdp_params"])

        ppo_and_bc = evaluator.evaluate_agent_pair(AgentPair(
            agent_ppo_bc_test, agent_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_ppo_and_bc = np.mean(ppo_and_bc['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_test+BC_test_0"].append(
            avg_ppo_and_bc)

        bc_and_ppo = evaluator.evaluate_agent_pair(AgentPair(
            agent_bc_test, agent_ppo_bc_test),
                                                   num_games=num_rounds,
                                                   display=display)
        avg_bc_and_ppo = np.mean(bc_and_ppo['ep_returns'])
        ppo_bc_performance[layout]["PPO_BC_test+BC_test_1"].append(
            avg_bc_and_ppo)

    return ppo_bc_performance