Exemple #1
0
def get_base_ae(mdp_params,
                env_params,
                outer_shape=None,
                mdp_params_schedule_fn=None):
    """
    mdp_params: one set of fixed mdp parameter used by the enviroment
    env_params: env parameters (horizon, etc)
    outer_shape: outer shape of the environment
    mdp_params_schedule_fn: the schedule for varying mdp params

    return: the base agent evaluator
    """
    assert mdp_params == None or mdp_params_schedule_fn == None, "either of the two has to be null"
    if type(mdp_params) == dict and "layout_name" in mdp_params:
        ae = AgentEvaluator.from_layout_name(mdp_params=mdp_params,
                                             env_params=env_params)
    elif 'num_mdp' in env_params:
        if np.isinf(env_params['num_mdp']):
            ae = AgentEvaluator.from_mdp_params_infinite(
                mdp_params=mdp_params,
                env_params=env_params,
                outer_shape=outer_shape,
                mdp_params_schedule_fn=mdp_params_schedule_fn)
        else:
            ae = AgentEvaluator.from_mdp_params_finite(
                mdp_params=mdp_params,
                env_params=env_params,
                outer_shape=outer_shape,
                mdp_params_schedule_fn=mdp_params_schedule_fn)
    else:
        # should not reach this case
        raise NotImplementedError()
    return ae
    def test_scenario_3_yes_counter(self):
        # Asymmetric advantage scenario
        #
        # X X X X X O X X X X
        # S           X X P X
        # X         ↑H      X
        # D   X X X X!X X   X
        # X           →R    O
        # X X X X X X X X X X
        #
        # This test does not allow only (5. 3) as the only counter

        mdp_params = {"layout_name": "scenario3"}
        mdp = OvercookedGridworld.from_layout_name(**mdp_params)
        start_state = mdp.get_standard_start_state()

        valid_counters = [(5, 3)]
        one_counter_params = {
            'start_orientations': False,
            'wait_allowed': False,
            'counter_goals': valid_counters,
            'counter_drop': valid_counters,
            'counter_pickup': [],
            'same_motion_goals': True
        }

        env_params = {"start_state_fn": lambda: start_state, "horizon": 1000}
        eva = AgentEvaluator.from_layout_name(mdp_params,
                                              env_params,
                                              mlam_params=one_counter_params,
                                              force_compute=force_compute)

        self.repetative_runs(eva)
 def test_from_mdp_params_layout(self):
     for layout_name in self.layout_name_lst:
         orignal_mdp = OvercookedGridworld.from_layout_name(layout_name)
         ae = AgentEvaluator.from_layout_name(
             mdp_params={"layout_name": layout_name},
             env_params={"horizon": 400})
         ae_mdp = ae.env.mdp
         self.assertEqual(
             orignal_mdp, ae_mdp, "mdp with name " + layout_name +
             " experienced an inconsistency")
Exemple #4
0
    def test_save_load(self):
        # Train a quick self play agent for 2 iterations
        ex.run(
            config_updates={
                # Please feel free to modify the parameters below
                "results_dir": self.temp_results_dir,
                "experiment_name": "save_load_test",
                "layout_name": "cramped_room",
                "num_workers": 1,
                "train_batch_size": 800,
                "sgd_minibatch_size": 800,
                "num_training_iters": 2,
                "evaluation_interval": 10,
                "entropy_coeff_start": 0.0,
                "entropy_coeff_end": 0.0,
                "use_phi": False,
                "evaluation_display": False,
                "verbose": False
            },
            options={'--loglevel': 'ERROR'})

        # Kill all ray processes to ensure loading works in a vaccuum
        ray.shutdown()

        # Where the agent is stored (this is kind of hardcoded, would like for it to be more easily obtainable)
        load_path = os.path.join(
            glob.glob(os.path.join(self.temp_results_dir,
                                   "save_load_test*"))[0], 'checkpoint_2',
            'checkpoint-2')

        # Load a dummy state
        mdp = OvercookedGridworld.from_layout_name("cramped_room")
        state = mdp.get_standard_start_state()

        # Ensure simple single-agent loading works
        agent_0 = load_agent(load_path)
        agent_0.reset()

        agent_1 = load_agent(load_path)
        agent_1.reset()

        # Ensure forward pass of policy network still works
        _, _ = agent_0.action(state)
        _, _ = agent_1.action(state)

        # Now let's load an agent pair and evaluate it
        agent_pair = load_agent_pair(load_path)
        ae = AgentEvaluator.from_layout_name(
            mdp_params={"layout_name": "cramped_room"},
            env_params={"horizon": 400})

        # We assume no runtime errors => success, no performance consistency check for now
        ae.evaluate_agent_pair(agent_pair, 1, info=False)
def df_traj_to_python_joint_traj(traj_df, complete_traj=True):
    if len(traj_df) == 0:
        return None

    datapoint = traj_df.iloc[0]
    layout_name = datapoint['layout_name']
    agent_evaluator = AgentEvaluator.from_layout_name(
        mdp_params={"layout_name": layout_name},
        env_params={
            "horizon": 1250
        }  # Defining the horizon of the mdp of origin of the trajectories
    )
    mdp = agent_evaluator.env.mdp
    env = agent_evaluator.env

    overcooked_states = [json_state_to_python_state(s) for s in traj_df.state]
    overcooked_actions = [
        json_joint_action_to_python_action(joint_action)
        for joint_action in traj_df.joint_action
    ]
    overcooked_rewards = list(traj_df.reward)

    assert sum(
        overcooked_rewards
    ) == datapoint.score_total, "Rewards didn't sum up to cumulative rewards. Probably trajectory df is corrupted / not complete"

    trajectories = {
        "ep_observations": [overcooked_states],
        "ep_actions": [overcooked_actions],
        "ep_rewards": [overcooked_rewards],  # Individual (dense) reward values
        "ep_dones":
        [[False] * len(overcooked_states)],  # Individual done values
        "ep_infos": [{}] * len(overcooked_states),
        "ep_returns":
        [sum(overcooked_rewards)],  # Sum of dense rewards across each episode
        "ep_lengths": [len(overcooked_states)],  # Lengths of each episode
        "mdp_params": [mdp.mdp_params],
        "env_params": [env.env_params],
        "metadatas": {
            'player_0_id': [datapoint['player_0_id']],
            'player_1_id': [datapoint['player_1_id']],
            'env': [agent_evaluator.env]
        }
    }
    trajectories = {
        k: np.array(v) if k not in ["ep_actions", "metadatas"] else v
        for k, v in trajectories.items()
    }

    if complete_traj:
        agent_evaluator.check_trajectories(trajectories)
    return trajectories
    def test_scenario_3_no_counter(self):
        # Asymmetric advantage scenario
        #
        # X X X X X O X X X X
        # S           X X P X
        # X         ↑H      X
        # D   X X X X!X X   X
        # X           →R    O
        # X X X X X X X X X X
        #
        # This test does not allow counter by using the default NO_COUNTER_PARAMS when calling from_layout_name

        mdp_params = {"layout_name": "scenario3"}
        mdp = OvercookedGridworld.from_layout_name(**mdp_params)
        start_state = mdp.get_standard_start_state()

        env_params = {"start_state_fn": lambda: start_state, "horizon": 1000}
        eva = AgentEvaluator.from_layout_name(mdp_params,
                                              env_params,
                                              force_compute=force_compute)

        self.repetative_runs(eva)
 def setUp(self):
     self.agent_eval = AgentEvaluator.from_layout_name(
         {"layout_name": "cramped_room"}, {"horizon": 100})