Beispiel #1
0
    def test_multiple_mdp_env(self):
        mdp0 = OvercookedGridworld.from_layout_name("cramped_room")
        mdp1 = OvercookedGridworld.from_layout_name("counter_circuit")
        mdp_fn = lambda: np.random.choice([mdp0, mdp1])

        # Default env
        env = OvercookedEnv(mdp_fn, horizon=100)
        env.get_rollouts(self.rnd_agent_pair, 5)
Beispiel #2
0
    def mdp_gen_fn_from_dict(mdp_params={},
                             mdp_choices=None,
                             size_bounds=((4, 7), (4, 7)),
                             prop_empty=(0.6, 0.8),
                             prop_feats=(0.1, 0.2),
                             display=False):
        """
        Returns an MDP generator with the passed in properties.

        mdp_choices: selects MDP randomly among choices

        OR (if mdp_choices is None)

        size_bounds: (min_layout_size, max_layout_size)
        prop_empty: (min, max) proportion of empty space in generated layout
        prop_feats: (min, max) proportion of counters with features on them
        """

        if "layout_name" in mdp_params.keys(
        ) and mdp_params["layout_name"] is not None:
            mdp = OvercookedGridworld.from_layout_name(**mdp_params)
            mdp_generator_fn = lambda: mdp

        elif mdp_choices is not None:
            assert type(mdp_choices) is list

            # If list of MDPs, randomly choose one at each reset
            mdp_sizes = []
            for mdp_name in mdp_choices:
                mdp = OvercookedGridworld.from_layout_name(
                    mdp_name, **mdp_params)
                mdp_sizes.append([mdp.width, mdp.height])
            widths, heights = np.array(mdp_sizes).T
            min_padding = max(widths), max(heights)

            def mdp_generator_fn():
                chosen_mdp = np.random.choice(mdp_choices)
                mdp = OvercookedGridworld.from_layout_name(
                    chosen_mdp, **mdp_params)
                lg = LayoutGenerator(min_padding, mdp_params)
                mdp_padded = lg.padded_mdp(mdp)
                return mdp_padded
        else:
            min_padding = (size_bounds[0][1], size_bounds[1][1])
            layout_generator = LayoutGenerator(min_padding, mdp_params)
            mdp_generator_fn = lambda: layout_generator.make_disjoint_sets_layout(
                inner_shape=[rnd_int_uniform(*dim) for dim in size_bounds],
                prop_empty=rnd_uniform(*prop_empty),
                prop_features=rnd_uniform(*prop_feats),
                display=display)

        return mdp_generator_fn
def evaluate_layout_loss_for_pbt_models(pbt_model_paths,
                                        layout_name,
                                        trajs,
                                        eps,
                                        seeds,
                                        best=True):
    layout_losses = defaultdict(dict)

    pbt_save_dir = PBT_DATA_DIR + pbt_model_paths[layout_name] + "/"
    pbt_config = load_dict_from_txt(pbt_save_dir + "config")

    for seed in seeds:
        reset_tf()
        agent_pbt = get_pbt_agent_from_config(pbt_save_dir,
                                              pbt_config["sim_threads"],
                                              seed=seed,
                                              agent_idx=0,
                                              best=best)
        agent_pbt.action_probs = True
        agent_pbt.set_mdp(
            OvercookedGridworld.from_layout_name(**pbt_config["mdp_params"]))

        losses, accuracies = get_trajs_losses_for_model(trajs, agent_pbt, eps)
        layout_losses["{}_seed{}".format(layout_name, seed)]['losses'] = losses
        layout_losses["{}_seed{}".format(layout_name,
                                         seed)]['accuracies'] = accuracies
    return layout_losses
def get_bc_agent_from_model(model, bc_params, no_waits=False):
    mdp = OvercookedGridworld.from_layout_name(**bc_params["mdp_params"])
    mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=False)
    
    def encoded_state_policy(observations, include_waits=True, stochastic=False):
        action_probs_n = model.action_probability(observations)

        if not include_waits:
            action_probs = ImitationAgentFromPolicy.remove_indices_and_renormalize(
                action_probs_n, [Action.ACTION_TO_INDEX[Direction.STAY]])
        
        if stochastic:
            return [np.random.choice(len(action_probs[i]), p=action_probs[i]) for i in range(len(action_probs))]
        return action_probs_n

    def state_policy(mdp_states, agent_indices, include_waits, stochastic=False):
        # encode_fn = lambda s: mdp.preprocess_observation(s)
        encode_fn = lambda s: mdp.featurize_state(s, mlp)

        obs = []
        for agent_idx, s in zip(agent_indices, mdp_states):
            ob = encode_fn(s)[agent_idx]
            obs.append(ob)
        obs = np.array(obs)
        action_probs = encoded_state_policy(obs, include_waits, stochastic)
        return action_probs

    return ImitationAgentFromPolicy(state_policy, encoded_state_policy, no_waits=no_waits, mlp=mlp)
Beispiel #5
0
    def _check_trajectories_dynamics(trajectories):
        for idx in range(len(trajectories["ep_observations"])):
            states, actions, rewards = trajectories["ep_observations"][
                idx], trajectories["ep_actions"][idx], trajectories[
                    "ep_rewards"][idx]
            mdp_params, env_params = trajectories["mdp_params"][
                idx], trajectories["env_params"][idx]

            assert len(states) == len(actions) == len(
                rewards), "# states {}\t# actions {}\t# rewards {}".format(
                    len(states), len(actions), len(rewards))

            # Checking that actions would give rise to same behaviour in current MDP
            simulation_env = OvercookedEnv(
                OvercookedGridworld.from_layout_name(**mdp_params),
                **env_params)
            for i in range(len(states) - 1):
                curr_state = states[i]
                simulation_env.state = curr_state

                next_state, reward, done, info = simulation_env.step(
                    actions[i])

                assert states[
                    i +
                    1] == next_state, "States differed (expected vs actual): {}".format(
                        simulation_env.display_states(states[i + 1],
                                                      next_state))
                assert rewards[i] == reward, "{} \t {}".format(
                    rewards[i], reward)
Beispiel #6
0
    def __init__(self,
                 mdp_params,
                 env_params={},
                 mdp_fn_params=None,
                 force_compute=False,
                 mlp_params=NO_COUNTERS_PARAMS,
                 debug=False):
        """
        mdp_params (dict): params for creation of an OvercookedGridworld instance through the `from_layout_name` method
        env_params (dict): params for creation of an OvercookedEnv
        mdp_fn_params (dict): params to setup random MDP generation
        force_compute (bool): whether should re-compute MediumLevelPlanner although matching file is found
        mlp_params (dict): params for MediumLevelPlanner
        """
        assert type(mdp_params) is dict, "mdp_params must be a dictionary"

        if mdp_fn_params is None:
            self.variable_mdp = False
            self.mdp_fn = lambda: OvercookedGridworld.from_layout_name(
                **mdp_params)
        else:
            self.variable_mdp = True
            self.mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict(
                mdp_params, **mdp_fn_params)

        self.env = OvercookedEnv(self.mdp_fn, **env_params)
        self.force_compute = force_compute
        self.debug = debug
        self.mlp_params = mlp_params
        self._mlp = None
Beispiel #7
0
    def activate(self):
        super(OvercookedGame, self).activate()

        # Sanity check at start of each game
        if not self.npc_players.union(self.human_players) == set(self.players):
            raise ValueError("Inconsistent State")

        self.curr_layout = self.layouts.pop()
        mdp = OvercookedGridworld.from_layout_name(self.curr_layout,
                                                   **self.mdp_params)
        self.env = OvercookedEnv.from_mdp(mdp)
        if self.show_potential:
            self.mp = MotionPlanner.from_pickle_or_compute(
                self.mdp, counter_goals=NO_COUNTERS_PARAMS)

        if self.show_potential:
            self.phi = self.mdp.potential_function(self.state,
                                                   self.mp,
                                                   gamma=0.99)
        self.start_time = time()
        self.curr_tick = 0
        self.score = 0
        self.threads = []
        for npc_policy in self.npc_policies:
            self.npc_policies[npc_policy].reset()
            self.npc_state_queues[npc_policy].put(self.state)
            t = Thread(target=self.npc_policy_consumer, args=(npc_policy, ))
            self.threads.append(t)
            t.start()
    def test_scenario_4(self):
        # Yet another asymmetric advantage scenario
        #
        # X X X X X O X X X X
        # S             X P=X
        # D         ↑H      X
        # X X X X X X X X   X
        # X X X X X X →R    O
        # X X X X X X X X X X
        #
        # Similar to scenario 3, just keeping for reference for now.
        # In this case we only have human suboptimality, and R
        # assuming H optimality does not end up to be a problem
        mdp_params = {"layout_name": "scenario4", "cook_time": 5}
        mdp = OvercookedGridworld.from_layout_name(**mdp_params)

        start_state = mdp.get_standard_start_state()
        start_state.objects = {(8, 1): Obj('soup', (8, 1), ('onion', 2, 5))}
        start_state.order_list = ['onion']

        env_params = {"start_state_fn": lambda: start_state, "horizon": 1000}
        eva = AgentEvaluator(mdp_params,
                             env_params,
                             force_compute=force_compute)
        self.compare_times(eva)
Beispiel #9
0
 def test_one_player_env(self):
     mdp = OvercookedGridworld.from_layout_name("cramped_room_single")
     env = OvercookedEnv(mdp, horizon=12)
     a0 = FixedPlanAgent([stay, w, w, e, e, n, e, interact, w, n, interact])
     ag = AgentGroup(a0)
     env.run_agents(ag, display=False)
     self.assertEqual(env.state.players_pos_and_or, (((2, 1), (0, -1)), ))
Beispiel #10
0
 def test_file_constructor(self):
     mdp = OvercookedGridworld.from_layout_name('corridor')
     expected_start_state = OvercookedState(
         [PlayerState((3, 1), Direction.NORTH), PlayerState((10, 1), Direction.NORTH)], {},
         all_orders=[{ "ingredients" : ["onion", "onion", "onion"]}])
     actual_start_state = mdp.get_standard_start_state()
     self.assertEqual(actual_start_state, expected_start_state, '\n' + str(actual_start_state) + '\n' + str(expected_start_state))
    def test_from_mdp_lst_biased(self):
        mdp_lst = [
            OvercookedGridworld.from_layout_name(name)
            for name in self.layout_name_short_lst
        ]
        ae = AgentEvaluator.from_mdp_lst(mdp_lst=mdp_lst,
                                         env_params={"horizon": 400},
                                         sampling_freq=self.biased)
        counts = {}

        for _ in range(self.num_reset):
            ae.env.reset(regen_mdp=True)
            if ae.env.mdp.layout_name in counts:
                counts[ae.env.mdp.layout_name] += 1
            else:
                counts[ae.env.mdp.layout_name] = 1

        # construct the ground truth
        gt = {
            self.layout_name_short_lst[i]: self.biased[i]
            for i in range(len(self.layout_name_short_lst))
        }

        for k, v in counts.items():
            self.assertAlmostEqual(gt[k], v / self.num_reset, 2,
                                   "more than 2 places off for " + k)
Beispiel #12
0
    def test_scenario_3_yes_counter(self):
        # Asymmetric advantage scenario
        #
        # X X X X X O X X X X
        # S           X X P X
        # X         ↑H      X
        # D   X X X X!X X   X
        # X           →R    O
        # X X X X X X X X X X
        #
        # This test does not allow only (5. 3) as the only counter

        mdp_params = {"layout_name": "scenario3"}
        mdp = OvercookedGridworld.from_layout_name(**mdp_params)
        start_state = mdp.get_standard_start_state()

        valid_counters = [(5, 3)]
        one_counter_params = {
            'start_orientations': False,
            'wait_allowed': False,
            'counter_goals': valid_counters,
            'counter_drop': valid_counters,
            'counter_pickup': [],
            'same_motion_goals': True
        }

        env_params = {"start_state_fn": lambda: start_state, "horizon": 1000}
        eva = AgentEvaluator.from_layout_name(mdp_params,
                                              env_params,
                                              mlam_params=one_counter_params,
                                              force_compute=force_compute)

        self.repetative_runs(eva)
Beispiel #13
0
 def test_get_encoding_function(self):
     mdp = OvercookedGridworld.from_layout_name("cramped_room")
     mdp_params = mdp.mdp_params
     env_params = {"horizon": 100}
     env = OvercookedEnv.from_mdp(mdp, **env_params)
     state = mdp.get_standard_start_state()
     example_encoding_fns_names = ["mdp.multi_hot_orders_encoding", "env.featurize_state_mdp", "env.lossless_state_encoding_mdp"]
     example_encoding_fns = [mdp.multi_hot_orders_encoding, env.featurize_state_mdp, env.lossless_state_encoding_mdp]
     for encoding_fn_name, encoding_fn in zip(example_encoding_fns_names, example_encoding_fns):
         encoding_fn_from_name = get_encoding_function(encoding_fn_name, env=env)
         self.assertEqual(encoding_fn_from_name, encoding_fn)
         if encoding_fn_name.split(".")[0] == "mdp":
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, mdp=mdp)
             self.assertEqual(encoding_fn_from_name, encoding_fn)
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, mdp_params=mdp_params)
             # compare names as new instance of mdp is created
             self.assertEqual(encoding_fn_from_name.__name__, encoding_fn.__name__)
         else:
             encoding_fn_from_name = get_encoding_function(encoding_fn_name, env_params=env_params, mdp_params=mdp_params)
             # compare names as new instance of env is created
             self.assertEqual(encoding_fn_from_name.__name__, encoding_fn.__name__) 
     
     expected_encoded_state_dict = {str(i): fn(state) for i, fn in enumerate(example_encoding_fns)}
     actual_encoded_state_dict = get_encoding_function({str(i): fn_name for i, fn_name in enumerate(example_encoding_fns_names)}, env=env)(state)
     self.assertEqual(expected_encoded_state_dict.keys(), actual_encoded_state_dict.keys())
     for k in expected_encoded_state_dict.keys():
         self.assertTrue(np.array_equal(expected_encoded_state_dict[k], actual_encoded_state_dict[k]))
 def test_scenario_1(self):
     # Myopic corridor collision
     #
     # X X X X X O X D X X X X X
     # X   ↓Ho     X           X
     # X     X X X X X X X ↓R  X
     # X                       X
     # X S X X X X X X X X P P X
     #
     # H on left with onion, further away to the tunnel entrance than R.
     # Optimal planner tells R to go first and that H will wait
     # for R to pass. H however, starts going through the tunnel
     # and they get stuck. The H plan is a bit extreme (it would probably
     # realize that it should retrace it's steps at some point)
     scenario_1_mdp = OvercookedGridworld.from_layout_name(
         'small_corridor', start_order_list=['any'], cook_time=5)
     mlp = MediumLevelPlanner.from_pickle_or_compute(
         scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlp)
     a1 = CoupledPlanningAgent(mlp)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState(
         [P((2, 1), s, Obj('onion', (2, 1))),
          P((10, 2), s)], {},
         order_list=['onion'])
     env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                  start_state_fn=lambda: start_state)
     env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
 def mdp_gen_fn_from_dict(mdp_params,
                          outer_shape=None,
                          mdp_params_schedule_fn=None):
     """
     mdp_params: one set of fixed mdp parameter used by the environment
     outer_shape: outer shape of the environment
     mdp_params_schedule_fn: the schedule for varying mdp params
     """
     # if outer_shape is not defined, we have to be using one of the default layout from names bank
     if outer_shape is None:
         assert type(mdp_params) is dict and "layout_name" in mdp_params
         mdp = OvercookedGridworld.from_layout_name(**mdp_params)
         mdp_fn = lambda _ignored: mdp
     else:
         # there is no schedule, we are using the same set of mdp_params all the time
         if mdp_params_schedule_fn is None:
             assert mdp_params is not None
             mdp_pg = MDPParamsGenerator.from_fixed_param(
                 mdp_params_always=mdp_params)
         else:
             assert mdp_params is None, "please remove the mdp_params from the variable, " \
                                        "because mdp_params_schedule_fn exist and we will " \
                                        "always use the schedule_fn if it exist"
             mdp_pg = MDPParamsGenerator(
                 params_schedule_fn=mdp_params_schedule_fn)
         lg = LayoutGenerator(mdp_pg, outer_shape)
         mdp_fn = lg.generate_padded_mdp
     return mdp_fn
Beispiel #16
0
 def mdp_generator_fn():
     chosen_mdp = np.random.choice(mdp_choices)
     mdp = OvercookedGridworld.from_layout_name(
         chosen_mdp, **mdp_params)
     lg = LayoutGenerator(min_padding, mdp_params)
     mdp_padded = lg.padded_mdp(mdp)
     return mdp_padded
    def test_scenario_1_s(self):
        # Smaller version of the corridor collisions scenario above
        # to facilitate DRL training
        scenario_1_mdp = OvercookedGridworld.from_layout_name(
            'scenario1_s', start_order_list=['any'], cook_time=5)
        mlp = MediumLevelPlanner.from_pickle_or_compute(
            scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
        a0 = GreedyHumanModel(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        start_state = OvercookedState(
            [P((2, 1), s, Obj('onion', (2, 1))),
             P((4, 2), s)], {},
            order_list=['onion'])
        env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                     start_state_fn=lambda: start_state)
        trajectory, time_taken_hr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)
        env.reset()

        print("\n" * 5)
        print("-" * 50)

        a0 = CoupledPlanningAgent(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        trajectory, time_taken_rr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)

        print("H+R time taken: ", time_taken_hr)
        print("R+R time taken: ", time_taken_rr)
        self.assertGreater(time_taken_hr, time_taken_rr)
Beispiel #18
0
 def setUp(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     self.mlp = MediumLevelPlanner.from_pickle_or_compute(
         self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True)
     self.env = OvercookedEnv(self.base_mdp, **DEFAULT_ENV_PARAMS)
     self.rnd_agent_pair = AgentPair(GreedyHumanModel(self.mlp),
                                     GreedyHumanModel(self.mlp))
     np.random.seed(0)
Beispiel #19
0
 def test_display_phi(self):
     mdp0 = OvercookedGridworld.from_layout_name("cramped_room")
     mdp_fn = lambda _ignored: mdp0
     env = OvercookedEnv(mdp_fn, horizon=20)
     env.get_rollouts(self.rnd_agent_pair,
                      1,
                      display=True,
                      display_phi=True)
Beispiel #20
0
 def mdps_and_envs_from_trajectories(trajectories):
     mdps, envs = [], []
     for idx in range(len(trajectories["ep_lengths"])):
         mdp_params, env_params = trajectories["mdp_params"][idx], trajectories["env_params"][idx]
         mdp = OvercookedGridworld.from_layout_name(**mdp_params)
         env = OvercookedEnv(mdp, **env_params)
         mdps.append(mdp)
         envs.append(env)
     return mdps, envs
Beispiel #21
0
 def test_starting_obj_randomization(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     start_state_fn = self.base_mdp.get_random_start_state_fn(random_start_pos=False, rnd_obj_prob_thresh=0.8)
     env = OvercookedEnv.from_mdp(self.base_mdp, start_state_fn)
     start_state = env.state.all_objects_list
     for _ in range(3):
         env.reset()
         curr_terrain = env.state.all_objects_list
         self.assertFalse(np.array_equal(start_state, curr_terrain))
 def test_from_mdp(self):
     for layout_name in self.layout_name_lst:
         orignal_mdp = OvercookedGridworld.from_layout_name(layout_name)
         ae = AgentEvaluator.from_mdp(mdp=orignal_mdp,
                                      env_params={"horizon": 400})
         ae_mdp = ae.env.mdp
         self.assertEqual(
             orignal_mdp, ae_mdp, "mdp with name " + layout_name +
             " experienced an inconsistency")
def init_gym_env(bc_params):
    env_setup_params = copy.deepcopy(bc_params)
    del env_setup_params["data_params"]  # Not necessary for setting up env
    mdp = OvercookedGridworld.from_layout_name(**bc_params["mdp_params"])
    env = OvercookedEnv(mdp, **bc_params["env_params"])
    gym_env = gym.make("Overcooked-v0")
    
    mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=False)
    gym_env.custom_init(env, featurize_fn=lambda x: mdp.featurize_state(x, mlp))
    return gym_env
Beispiel #24
0
 def test_file_constructor(self):
     mdp = OvercookedGridworld.from_layout_name('corridor')
     expected_start_state = OvercookedState([
         PlayerState((3, 1), Direction.NORTH),
         PlayerState((10, 1), Direction.NORTH)
     ], {},
                                            order_list=None)
     actual_start_state = mdp.get_standard_start_state()
     self.assertEqual(
         actual_start_state, expected_start_state,
         '\n' + str(actual_start_state) + '\n' + str(expected_start_state))
Beispiel #25
0
 def test_starting_position_randomization(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     start_state_fn = self.base_mdp.get_random_start_state_fn(
         random_start_pos=True, rnd_obj_prob_thresh=0.0)
     env = OvercookedEnv(self.base_mdp, start_state_fn)
     start_state = env.state.players_pos_and_or
     for _ in range(3):
         env.reset()
         print(env)
         curr_terrain = env.state.players_pos_and_or
         self.assertFalse(np.array_equal(start_state, curr_terrain))
Beispiel #26
0
    def test_save_load(self):
        # Train a quick self play agent for 2 iterations
        ex.run(
            config_updates={
                # Please feel free to modify the parameters below
                "results_dir": self.temp_results_dir,
                "experiment_name": "save_load_test",
                "layout_name": "cramped_room",
                "num_workers": 1,
                "train_batch_size": 800,
                "sgd_minibatch_size": 800,
                "num_training_iters": 2,
                "evaluation_interval": 10,
                "entropy_coeff_start": 0.0,
                "entropy_coeff_end": 0.0,
                "use_phi": False,
                "evaluation_display": False,
                "verbose": False
            },
            options={'--loglevel': 'ERROR'})

        # Kill all ray processes to ensure loading works in a vaccuum
        ray.shutdown()

        # Where the agent is stored (this is kind of hardcoded, would like for it to be more easily obtainable)
        load_path = os.path.join(
            glob.glob(os.path.join(self.temp_results_dir,
                                   "save_load_test*"))[0], 'checkpoint_2',
            'checkpoint-2')

        # Load a dummy state
        mdp = OvercookedGridworld.from_layout_name("cramped_room")
        state = mdp.get_standard_start_state()

        # Ensure simple single-agent loading works
        agent_0 = load_agent(load_path)
        agent_0.reset()

        agent_1 = load_agent(load_path)
        agent_1.reset()

        # Ensure forward pass of policy network still works
        _, _ = agent_0.action(state)
        _, _ = agent_1.action(state)

        # Now let's load an agent pair and evaluate it
        agent_pair = load_agent_pair(load_path)
        ae = AgentEvaluator.from_layout_name(
            mdp_params={"layout_name": "cramped_room"},
            env_params={"horizon": 400})

        # We assume no runtime errors => success, no performance consistency check for now
        ae.evaluate_agent_pair(agent_pair, 1, info=False)
Beispiel #27
0
def ppo_run(params):

    create_dir_if_not_exists(params["SAVE_DIR"])
    save_pickle(params, params["SAVE_DIR"] + "config")

    #############
    # PPO SETUP #
    #############

    train_infos = []

    for seed in params["SEEDS"]:
        reset_tf()
        set_global_seed(seed)

        curr_seed_dir = params["SAVE_DIR"] + "seed" + str(seed) + "/"
        create_dir_if_not_exists(curr_seed_dir)

        save_pickle(params, curr_seed_dir + "config")

        print("Creating env with params", params)
        # Configure mdp
        
        mdp = OvercookedGridworld.from_layout_name(**params["mdp_params"])
        env = OvercookedEnv(mdp, **params["env_params"])
        mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=True) 

        # Configure gym env
        gym_env = get_vectorized_gym_env(
            env, 'Overcooked-v0', featurize_fn=lambda x: mdp.lossless_state_encoding(x), **params
        )
        gym_env.self_play_randomization = 0 if params["SELF_PLAY_HORIZON"] is None else 1
        gym_env.trajectory_sp = params["TRAJECTORY_SELF_PLAY"]
        gym_env.update_reward_shaping_param(1 if params["mdp_params"]["rew_shaping_params"] != 0 else 0)

        configure_other_agent(params, gym_env, mlp, mdp)

        # Create model
        with tf.device('/device:GPU:{}'.format(params["GPU_ID"])):
            model = create_model(gym_env, "ppo_agent", **params)

        # Train model
        params["CURR_SEED"] = seed
        train_info = update_model(gym_env, model, **params)
        
        # Save model
        save_ppo_model(model, curr_seed_dir + model.agent_name)
        print("Saved training info at", curr_seed_dir + "training_info")
        save_pickle(train_info, curr_seed_dir + "training_info")
        train_infos.append(train_info)
    
    return train_infos
 def from_layout_name(mdp_params,
                      env_params,
                      force_compute=False,
                      mlam_params=NO_COUNTERS_PARAMS,
                      debug=False):
     """
     mdp_params (dict): params for creation of an OvercookedGridworld instance through the `from_layout_name` method
     Information for the rest of params please refer to the __init__ method above
     """
     assert type(mdp_params) is dict and "layout_name" in mdp_params
     mdp = OvercookedGridworld.from_layout_name(**mdp_params)
     return AgentEvaluator.from_mdp(mdp, env_params, force_compute,
                                    mlam_params, debug)
Beispiel #29
0
 def test_four_player_env_fixed(self):
     mdp = OvercookedGridworld.from_layout_name("multiplayer_schelling")
     assert mdp.num_players == 4
     env = OvercookedEnv.from_mdp(mdp, horizon=16)
     a0 = FixedPlanAgent([stay, w, w])
     a1 = FixedPlanAgent([stay, stay, e, e, n, n, n, e, interact, n, n, w, w, w, n, interact, e])
     a2 = FixedPlanAgent([stay, w, interact, n, n, e, e, e, n, e, n, interact, w])
     a3 = FixedPlanAgent([e, interact, n, n, w, w, w, n, interact, e, s])
     ag = AgentGroup(a0, a1, a2, a3)
     env.run_agents(ag, display=False)
     self.assertEqual(
         env.state.players_pos_and_or,
         (((1, 1), (-1, 0)), ((3, 1), (0, -1)), ((2, 1), (-1, 0)), ((4, 2), (0, 1)))
     )
Beispiel #30
0
    def test_from_mdp_lst_uniform(self):
        mdp_lst = [OvercookedGridworld.from_layout_name(name) for name in self.layout_name_short_lst]
        ae = AgentEvaluator.from_mdp_lst(mdp_lst=mdp_lst, env_params={"horizon": 400}, sampling_freq=[0.2, 0.2, 0.2, 0.2, 0.2])
        counts = {}

        for _ in range(self.num_reset):
            ae.env.reset(regen_mdp=True)
            if ae.env.mdp.layout_name in counts:
                counts[ae.env.mdp.layout_name] += 1
            else:
                counts[ae.env.mdp.layout_name] = 1

        for k, v in counts.items():
            self.assertAlmostEqual(0.2, v/self.num_reset, 2, "more than 2 places off for " + k)