def test_human_model_pair(self):
     trajs = self.agent_eval.evaluate_human_model_pair()
     try:
         AgentEvaluator.check_trajectories(trajs)
     except AssertionError as e:
         self.fail("Trajectories were not returned in standard format:\n{}".
                   format(e))
Esempio n. 2
0
 def test_rollouts(self):
     ap = AgentPair(RandomAgent(), RandomAgent())
     trajs = self.agent_eval.evaluate_agent_pair(ap, num_games=5)
     try:
         AgentEvaluator.check_trajectories(trajs)
     except AssertionError as e:
         self.fail("Trajectories were not returned in standard format:\n{}".format(e))
Esempio n. 3
0
    def get_rollouts(self, agent_pair, num_games, display=False, dir=None, final_state=False, display_phi=False,
                     display_until=np.Inf, metadata_fn=None, metadata_info_fn=None, info=True):
        """
        Simulate `num_games` number rollouts with the current agent_pair and returns processed 
        trajectories.

        Returning excessive information to be able to convert trajectories to any required format 
        (baselines, stable_baselines, etc)

        metadata_fn returns some metadata information computed at the end of each trajectory based on
        some of the trajectory data.

        NOTE: this is the standard trajectories format used throughout the codebase
        """
        trajectories = { k:[] for k in self.DEFAULT_TRAJ_KEYS }
        metadata_fn = (lambda x: {}) if metadata_fn is None else metadata_fn
        metadata_info_fn = (lambda x: "") if metadata_info_fn is None else metadata_info_fn
        range_iterator = tqdm.trange(num_games, desc="", leave=True) if info else range(num_games)
        for i in range_iterator:
            agent_pair.set_mdp(self.mdp)

            rollout_info = self.run_agents(agent_pair, display=display, dir=dir, include_final_state=final_state,
                                           display_phi=display_phi, display_until=display_until)
            trajectory, time_taken, tot_rews_sparse, _tot_rews_shaped = rollout_info
            obs, actions, rews, dones, infos = trajectory.T[0], trajectory.T[1], trajectory.T[2], trajectory.T[3], trajectory.T[4]
            trajectories["ep_states"].append(obs)
            trajectories["ep_actions"].append(actions)
            trajectories["ep_rewards"].append(rews)
            trajectories["ep_dones"].append(dones)
            trajectories["ep_infos"].append(infos)
            trajectories["ep_returns"].append(tot_rews_sparse)
            trajectories["ep_lengths"].append(time_taken)
            trajectories["mdp_params"].append(self.mdp.mdp_params)
            trajectories["env_params"].append(self.env_params)
            trajectories["metadatas"].append(metadata_fn(rollout_info))

            # we do not need to regenerate MDP if we are trying to generate a series of rollouts using the same MDP
            # Basically, the FALSE here means that we are using the same layout and starting positions
            # (if regen_mdp == True, resetting will call mdp_gen_fn to generate another layout & starting position)
            self.reset(regen_mdp=False)
            agent_pair.reset()

            if info:
                mu, se = mean_and_std_err(trajectories["ep_returns"])
                description = "Avg rew: {:.2f} (std: {:.2f}, se: {:.2f}); avg len: {:.2f}; ".format(
                    mu, np.std(trajectories["ep_returns"]), se, np.mean(trajectories["ep_lengths"]))
                description += metadata_info_fn(trajectories["metadatas"])
                range_iterator.set_description(description)
                range_iterator.refresh()

        # Converting to numpy arrays
        trajectories = {k: np.array(v) for k, v in trajectories.items()}

        # Merging all metadata dictionaries, assumes same keys throughout all
        trajectories["metadatas"] = append_dictionaries(trajectories["metadatas"])

        # TODO: should probably transfer check methods over to Env class
        from overcooked_ai_py.agents.benchmarking import AgentEvaluator
        AgentEvaluator.check_trajectories(trajectories)
        return trajectories
Esempio n. 4
0
    def test_mdp_dynamics(self):
        traj_path = os.path.join(TESTING_DATA_DIR, 'test_mdp_dynamics', 'expected.json')

        # NOTE: uncomment the following line to recompute trajectories if MDP dymamics were deliberately updated
        generate_serialized_trajectory(self.base_mdp, traj_path)

        test_trajectory = AgentEvaluator.load_traj_from_json(traj_path)
        AgentEvaluator.check_trajectories(test_trajectory, from_json=True)
Esempio n. 5
0
 def test_common_mdp_jsons(self):
     traj_test_json_paths = iterate_over_files_in_dir(
         "../common_tests/trajectory_tests/")
     for test_json_path in traj_test_json_paths:
         test_trajectory = AgentEvaluator.load_traj_from_json(
             test_json_path)
         try:
             AgentEvaluator.check_trajectories(test_trajectory)
         except AssertionError as e:
             self.fail("File {} failed with error:\n{}".format(
                 test_json_path, e))
Esempio n. 6
0
def df_traj_to_python_joint_traj(traj_df, complete_traj=True):
    if len(traj_df) == 0:
        return None

    datapoint = traj_df.iloc[0]
    python_layout_name = JS_LAYOUT_NAME_TO_PYTHON_NAME[datapoint['layout_name']]
    # python_layout_name = datapoint['layout_name']
    agent_evaluator = AgentEvaluator(
        mdp_params={"layout_name": python_layout_name}, 
        env_params={"horizon": 1250}
    )
    mdp = agent_evaluator.env.mdp
    env = agent_evaluator.env

    overcooked_states = [json_state_to_python_state(mdp, s) for s in traj_df.state]
    overcooked_actions = [json_joint_action_to_python_action(joint_action) for joint_action in traj_df.joint_action]
    overcooked_rewards = list(traj_df.reward_norm)

    assert sum(overcooked_rewards) == datapoint.reward_norm_total, "Rewards didn't sum up to cumulative rewards. Probably trajectory df is corrupted / not complete"

    trajectories = {
        "ep_observations": [overcooked_states],
        "ep_actions": [overcooked_actions],
        "ep_rewards": [overcooked_rewards], # Individual (dense) reward values

        "ep_dones": [[False] * len(overcooked_states)], # Individual done values

        "ep_returns": [sum(overcooked_rewards)], # Sum of dense rewards across each episode
        "ep_returns_sparse": [sum(overcooked_rewards)], # Sum of sparse rewards across each episode
        "ep_lengths": [len(overcooked_states)], # Lengths of each episode
        "mdp_params": [mdp.mdp_params],
        "env_params": [env.env_params]
    }
    trajectories = {k: np.array(v) if k != "ep_actions" else v for k, v in trajectories.items() }

    if complete_traj:
        agent_evaluator.check_trajectories(trajectories)

    traj_metadata = {
        'worker_id': datapoint['workerid_num'],
        'round_num': datapoint['round_num'],
        'mdp': agent_evaluator.env.mdp
    }
    return trajectories, traj_metadata
Esempio n. 7
0
    def _get_trajectory_dict(self):
        trajectories = {k: [] for k in self.env.DEFAULT_TRAJ_KEYS}
        trajectory = np.array(self.trajectory)
        obs, actions, rews, dones, infos = trajectory.T[0], trajectory.T[
            1], trajectory.T[2], trajectory.T[3], trajectory.T[4]
        infos[-1] = self.env._add_episode_info(infos[-1])
        trajectories["ep_states"].append(obs)
        trajectories["ep_actions"].append(actions)
        trajectories["ep_rewards"].append(rews)
        trajectories["ep_dones"].append(dones)
        trajectories["ep_infos"].append(infos)
        trajectories["ep_returns"].append(self.score)
        trajectories["ep_lengths"].append(self.env.state.timestep)
        trajectories["mdp_params"].append(self.env.mdp.mdp_params)
        trajectories["env_params"].append(self.env.env_params)
        trajectories["metadatas"].append({})
        trajectories = {k: np.array(v) for k, v in trajectories.items()}

        AgentEvaluator.check_trajectories(trajectories)
        return trajectories
Esempio n. 8
0
    def get_rollouts(self,
                     agent_pair,
                     num_games,
                     display=False,
                     final_state=False,
                     agent_idx=0,
                     reward_shaping=0.0,
                     display_until=np.Inf,
                     info=True,
                     metadata_fn=lambda x: {}):
        """
        Simulate `num_games` number rollouts with the current agent_pair and returns processed 
        trajectories.

        Only returns the trajectories for one of the agents (the actions _that_ agent took), 
        namely the one indicated by `agent_idx`.

        Returning excessive information to be able to convert trajectories to any required format 
        (baselines, stable_baselines, etc)

        metadata_fn returns some metadata information computed at the end of each trajectory based on
        some of the trajectory data.

        NOTE: standard trajectories format used throughout the codebase
        """
        trajectories = {
            # With shape (n_timesteps, game_len), where game_len might vary across games:
            "ep_observations": [],
            "ep_actions": [],
            "ep_rewards":
            [],  # Individual dense (= sparse + shaped * rew_shaping) reward values
            "ep_dones": [],  # Individual done values
            "ep_infos": [],

            # With shape (n_episodes, ):
            "ep_returns": [],  # Sum of sparse rewards across each episode
            "ep_lengths": [],  # Lengths of each episode
            "mdp_params": [],  # Custom MDP params to for each episode
            "env_params": [],  # Custom Env params for each episode

            # Custom metadata key value pairs
            "metadatas": [
            ]  # Final data type is a dictionary of similar format to trajectories
        }

        range_fn = tqdm.trange if info else range
        for i in range_fn(num_games):
            agent_pair.set_mdp(self.mdp)

            rollout_info = self.run_agents(agent_pair,
                                           display=display,
                                           include_final_state=final_state,
                                           display_until=display_until)
            trajectory, time_taken, tot_rews_sparse, tot_rews_shaped = rollout_info
            obs, actions, rews, dones, infos = trajectory.T[0], trajectory.T[
                1], trajectory.T[2], trajectory.T[3], trajectory.T[4]
            trajectories["ep_observations"].append(obs)
            trajectories["ep_actions"].append(actions)
            trajectories["ep_rewards"].append(rews)
            trajectories["ep_dones"].append(dones)
            trajectories["ep_infos"].append(infos)
            trajectories["ep_returns"].append(tot_rews_sparse)
            trajectories["ep_lengths"].append(time_taken)
            trajectories["mdp_params"].append(self.mdp.mdp_params)
            trajectories["env_params"].append(self.env_params)
            trajectories["metadatas"].append(metadata_fn(rollout_info))

            self.reset()
            agent_pair.reset()

        mu, se = mean_and_std_err(trajectories["ep_returns"])
        if info:
            print(
                "Avg reward {:.2f} (std: {:.2f}, se: {:.2f}) over {} games of avg length {}"
                .format(mu, np.std(trajectories["ep_returns"]), se, num_games,
                        np.mean(trajectories["ep_lengths"])))

        # Converting to numpy arrays
        trajectories = {k: np.array(v) for k, v in trajectories.items()}

        # Merging all metadata dictionaries, assumes same keys throughout all
        trajectories["metadatas"] = merge_dictionaries(
            trajectories["metadatas"])

        # TODO: should probably transfer check methods over to Env class
        from overcooked_ai_py.agents.benchmarking import AgentEvaluator
        AgentEvaluator.check_trajectories(trajectories)
        return trajectories