def test_constructor(self): try: OvercookedEnv.from_mdp(self.base_mdp, horizon=10) except Exception as e: self.fail("Failed to instantiate OvercookedEnv:\n{}".format(e)) with self.assertRaises(TypeError): OvercookedEnv.from_mdp(self.base_mdp, **{"invalid_env_param": None})
def activate(self): super(OvercookedGame, self).activate() # Sanity check at start of each game if not self.npc_players.union(self.human_players) == set(self.players): raise ValueError("Inconsistent State") self.curr_layout = self.layouts.pop() mdp = OvercookedGridworld.from_layout_name(self.curr_layout, **self.mdp_params) self.env = OvercookedEnv.from_mdp(mdp) if self.show_potential: self.mp = MotionPlanner.from_pickle_or_compute( self.mdp, counter_goals=NO_COUNTERS_PARAMS) if self.show_potential: self.phi = self.mdp.potential_function(self.state, self.mp, gamma=0.99) self.start_time = time() self.curr_tick = 0 self.score = 0 self.threads = [] for npc_policy in self.npc_policies: self.npc_policies[npc_policy].reset() self.npc_state_queues[npc_policy].put(self.state) t = Thread(target=self.npc_policy_consumer, args=(npc_policy, )) self.threads.append(t) t.start()
def __init__(self, mdp_params, env_params={}, mdp_fn_params=None, force_compute=False, mlp_params=NO_COUNTERS_PARAMS, debug=False): """ mdp_params (dict): params for creation of an OvercookedGridworld instance through the `from_layout_name` method env_params (dict): params for creation of an OvercookedEnv mdp_fn_params (dict): params to setup random MDP generation force_compute (bool): whether should re-compute MediumLevelPlanner although matching file is found mlp_params (dict): params for MediumLevelPlanner """ assert type(mdp_params) is dict, "mdp_params must be a dictionary" if mdp_fn_params is None: mdp = OvercookedGridworld.from_layout_name(**mdp_params) self.mdp_fn = lambda: mdp self.env = OvercookedEnv.from_mdp(mdp, **env_params) else: self.mdp_fn = LayoutGenerator.mdp_gen_fn_from_dict( mdp_params, **mdp_fn_params) self.env = OvercookedEnv(self.mdp_fn, **env_params) self.force_compute = force_compute self.debug = debug self.mlp_params = mlp_params self._mlp = None
def test_scenario_1_s(self): # Smaller version of the corridor collisions scenario above # to facilitate DRL training scenario_1_mdp = OvercookedGridworld.from_layout_name( 'scenario1_s', start_order_list=['any'], cook_time=5) mlp = MediumLevelPlanner.from_pickle_or_compute( scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) a0 = GreedyHumanModel(mlp) a1 = CoupledPlanningAgent(mlp) agent_pair = AgentPair(a0, a1) start_state = OvercookedState( [P((2, 1), s, Obj('onion', (2, 1))), P((4, 2), s)], {}, order_list=['onion']) env = OvercookedEnv.from_mdp(scenario_1_mdp, start_state_fn=lambda: start_state) trajectory, time_taken_hr, _, _ = env.run_agents( agent_pair, include_final_state=True, display=DISPLAY) env.reset() print("\n" * 5) print("-" * 50) a0 = CoupledPlanningAgent(mlp) a1 = CoupledPlanningAgent(mlp) agent_pair = AgentPair(a0, a1) trajectory, time_taken_rr, _, _ = env.run_agents( agent_pair, include_final_state=True, display=DISPLAY) print("H+R time taken: ", time_taken_hr) print("R+R time taken: ", time_taken_rr) self.assertGreater(time_taken_hr, time_taken_rr)
def check_single_motion_plan(self, motion_planner, start_pos_and_or, goal_pos_and_or, expected_length=None): dummy_agent = P((3, 2), n) start_state = OvercookedState([P(*start_pos_and_or), dummy_agent], {}, all_orders=simple_mdp.start_all_orders) action_plan, pos_and_or_plan, plan_cost = motion_planner.get_plan( start_pos_and_or, goal_pos_and_or) # Checking that last state obtained matches goal position self.assertEqual(pos_and_or_plan[-1], goal_pos_and_or) # In single motion plans the graph cost should be equal to # the plan cost (= plan length) as agents should never STAY graph_plan_cost = sum( [motion_planner._graph_action_cost(a) for a in action_plan]) self.assertEqual(plan_cost, graph_plan_cost) joint_action_plan = [(a, stay) for a in action_plan] env = OvercookedEnv.from_mdp(motion_planner.mdp, horizon=1000) resulting_state, _ = env.execute_plan(start_state, joint_action_plan) self.assertEqual(resulting_state.players_pos_and_or[0], goal_pos_and_or) if expected_length is not None: self.assertEqual(len(action_plan), expected_length)
def check_joint_plan(self, joint_motion_planner, start, goal, times=None, min_t=None, display=False): """Runs the plan in the environment and checks that the intended goals are achieved.""" debug = False action_plan, end_pos_and_orients, plan_lengths = joint_motion_planner.get_low_level_action_plan( start, goal) if debug: print("Start state: {}, Goal state: {}, Action plan: {}".format( start, goal, action_plan)) start_state = OvercookedState( [P(*start[0]), P(*start[1])], {}, all_orders=simple_mdp.start_all_orders) env = OvercookedEnv.from_mdp(joint_motion_planner.mdp, horizon=1000) resulting_state, _ = env.execute_plan(start_state, action_plan, display=display) self.assertTrue( any([ agent_goal in resulting_state.players_pos_and_or for agent_goal in goal ])) self.assertEqual(resulting_state.players_pos_and_or, end_pos_and_orients) self.assertEqual(len(action_plan), min(plan_lengths)) if min_t is not None: self.assertEqual(len(action_plan), min_t) if times is not None: self.assertEqual(plan_lengths, times)
def test_slowed_down_agent(self): def should_stop(step_num, stop_every_n_steps): # currently SlowDownAgent always stops at 2nd step return not bool((i-1) % stop_every_n_steps) horizon = 100 #NOTE: if stop_every_n_steps is 3 this would not work because of rounding error # (ok for practical purposes, will just skip turn later but would fail test below) for stop_every_n_steps in [2, 4]: slowdown_rate = 1 - 1/stop_every_n_steps agent_pair = AgentPair( SlowedDownAgent(RandomAgent(), slowdown_rate), SlowedDownAgent(RandomAgent(), slowdown_rate) ) skip_action_probs = SlowedDownAgent(RandomAgent()).skip_action[1]["action_probs"].tolist() env = OvercookedEnv.from_mdp(large_mdp, horizon=horizon) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) for i, traj_step in enumerate(trajectory): (s_t, a_t, r_t, done, info) = traj_step if not done: agent_0_probs = info["agent_infos"][0]["action_probs"] agent_1_probs = info["agent_infos"][1]["action_probs"] if should_stop(i, stop_every_n_steps): self.assertEqual(agent_0_probs.tolist(), skip_action_probs) self.assertEqual(agent_1_probs.tolist(), skip_action_probs) else: self.assertNotEqual(agent_0_probs.tolist(), skip_action_probs) self.assertNotEqual(agent_1_probs.tolist(), skip_action_probs)
def test_scenario_1(self): # Myopic corridor collision # # X X X X X O X D X X X X X # X ↓Ho X X # X X X X X X X X ↓R X # X X # X S X X X X X X X X P P X # # H on left with onion, further away to the tunnel entrance than R. # Optimal planner tells R to go first and that H will wait # for R to pass. H however, starts going through the tunnel # and they get stuck. The H plan is a bit extreme (it would probably # realize that it should retrace it's steps at some point) scenario_1_mdp = OvercookedGridworld.from_layout_name( 'small_corridor', start_order_list=['any'], cook_time=5) mlp = MediumLevelPlanner.from_pickle_or_compute( scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) a0 = GreedyHumanModel(mlp) a1 = CoupledPlanningAgent(mlp) agent_pair = AgentPair(a0, a1) start_state = OvercookedState( [P((2, 1), s, Obj('onion', (2, 1))), P((10, 2), s)], {}, order_list=['onion']) env = OvercookedEnv.from_mdp(scenario_1_mdp, start_state_fn=lambda: start_state) env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
def test_one_player_env(self): mdp = OvercookedGridworld.from_layout_name("cramped_room_single") env = OvercookedEnv.from_mdp(mdp, horizon=12) a0 = FixedPlanAgent([stay, w, w, e, e, n, e, interact, w, n, interact]) ag = AgentGroup(a0) env.run_agents(ag, display=False) self.assertEqual(env.state.players_pos_and_or, (((2, 1), (0, -1)), ))
def test_transitions_and_environment(self): bad_state = OvercookedState([P((0, 0), s), P((3, 1), s)], {}, order_list=[]) with self.assertRaises(AssertionError): self.base_mdp.get_state_transition(bad_state, stay) env = OvercookedEnv.from_mdp(self.base_mdp) env.state.order_list = ['onion', 'any'] def check_transition(action, expected_state, expected_reward=0): state = env.state pred_state, sparse_reward, dense_reward, _ = self.base_mdp.get_state_transition( state, action) self.assertEqual( pred_state, expected_state, '\n' + str(pred_state) + '\n' + str(expected_state)) new_state, sparse_reward, _, _ = env.step(action) self.assertEqual(new_state, expected_state) self.assertEqual(sparse_reward, expected_reward) check_transition([n, e], OvercookedState([P( (1, 1), n), P((3, 1), e)], {}, order_list=['onion', 'any']))
def test_get_encoding_function(self): mdp = OvercookedGridworld.from_layout_name("cramped_room") mdp_params = mdp.mdp_params env_params = {"horizon": 100} env = OvercookedEnv.from_mdp(mdp, **env_params) state = mdp.get_standard_start_state() example_encoding_fns_names = ["mdp.multi_hot_orders_encoding", "env.featurize_state_mdp", "env.lossless_state_encoding_mdp"] example_encoding_fns = [mdp.multi_hot_orders_encoding, env.featurize_state_mdp, env.lossless_state_encoding_mdp] for encoding_fn_name, encoding_fn in zip(example_encoding_fns_names, example_encoding_fns): encoding_fn_from_name = get_encoding_function(encoding_fn_name, env=env) self.assertEqual(encoding_fn_from_name, encoding_fn) if encoding_fn_name.split(".")[0] == "mdp": encoding_fn_from_name = get_encoding_function(encoding_fn_name, mdp=mdp) self.assertEqual(encoding_fn_from_name, encoding_fn) encoding_fn_from_name = get_encoding_function(encoding_fn_name, mdp_params=mdp_params) # compare names as new instance of mdp is created self.assertEqual(encoding_fn_from_name.__name__, encoding_fn.__name__) else: encoding_fn_from_name = get_encoding_function(encoding_fn_name, env_params=env_params, mdp_params=mdp_params) # compare names as new instance of env is created self.assertEqual(encoding_fn_from_name.__name__, encoding_fn.__name__) expected_encoded_state_dict = {str(i): fn(state) for i, fn in enumerate(example_encoding_fns)} actual_encoded_state_dict = get_encoding_function({str(i): fn_name for i, fn_name in enumerate(example_encoding_fns_names)}, env=env)(state) self.assertEqual(expected_encoded_state_dict.keys(), actual_encoded_state_dict.keys()) for k in expected_encoded_state_dict.keys(): self.assertTrue(np.array_equal(expected_encoded_state_dict[k], actual_encoded_state_dict[k]))
def setUp(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") self.mlam = MediumLevelActionManager.from_pickle_or_compute( self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True) self.env = OvercookedEnv.from_mdp(self.base_mdp, **DEFAULT_ENV_PARAMS) self.rnd_agent_pair = AgentPair(GreedyHumanModel(self.mlam), GreedyHumanModel(self.mlam)) np.random.seed(0)
def test_one_coupled_one_fixed(self): a0 = CoupledPlanningAgent(self.mlp_large) a1 = FixedPlanAgent([s, e, n, w]) agent_pair = AgentPair(a0, a1) env = OvercookedEnv.from_mdp(large_mdp, horizon=10) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) self.assertEqual(time_taken, 10)
def test_starting_obj_randomization(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") start_state_fn = self.base_mdp.get_random_start_state_fn(random_start_pos=False, rnd_obj_prob_thresh=0.8) env = OvercookedEnv.from_mdp(self.base_mdp, start_state_fn) start_state = env.state.all_objects_list for _ in range(3): env.reset() curr_terrain = env.state.all_objects_list self.assertFalse(np.array_equal(start_state, curr_terrain))
def test_fixed_plan_agents(self): a0 = FixedPlanAgent([s, e, n, w]) a1 = FixedPlanAgent([s, w, n, e]) agent_pair = AgentPair(a0, a1) env = OvercookedEnv.from_mdp(large_mdp, horizon=10) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) end_state = trajectory[-1][0] self.assertEqual(time_taken, 10) self.assertEqual(env.mdp.get_standard_start_state().player_positions, end_state.player_positions)
def get_mdps_and_envs_from_trajectories(trajectories): mdps, envs = [], [] for idx in range(len(trajectories["ep_lengths"])): mdp_params = copy.deepcopy(trajectories["mdp_params"][idx]) env_params = copy.deepcopy(trajectories["env_params"][idx]) mdp = OvercookedGridworld.from_layout_name(**mdp_params) env = OvercookedEnv.from_mdp(mdp, **env_params) mdps.append(mdp) envs.append(env) return mdps, envs
def test_starting_position_randomization(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") start_state_fn = self.base_mdp.get_random_start_state_fn( random_start_pos=True, rnd_obj_prob_thresh=0.0) env = OvercookedEnv.from_mdp(self.base_mdp, start_state_fn) start_state = env.state.players_pos_and_or for _ in range(3): env.reset() print(env) curr_terrain = env.state.players_pos_and_or self.assertFalse(np.array_equal(start_state, curr_terrain))
def test_four_player_env_fixed(self): mdp = OvercookedGridworld.from_layout_name("multiplayer_schelling") assert mdp.num_players == 4 env = OvercookedEnv.from_mdp(mdp, horizon=16) a0 = FixedPlanAgent([stay, w, w]) a1 = FixedPlanAgent([stay, stay, e, e, n, n, n, e, interact, n, n, w, w, w, n, interact, e]) a2 = FixedPlanAgent([stay, w, interact, n, n, e, e, e, n, e, n, interact, w]) a3 = FixedPlanAgent([e, interact, n, n, w, w, w, n, interact, e, s]) ag = AgentGroup(a0, a1, a2, a3) env.run_agents(ag, display=False) self.assertEqual( env.state.players_pos_and_or, (((1, 1), (-1, 0)), ((3, 1), (0, -1)), ((2, 1), (-1, 0)), ((4, 2), (0, 1))) )
def test_two_coupled_agents(self): a0 = CoupledPlanningAgent(self.mlp_large) a1 = CoupledPlanningAgent(self.mlp_large) agent_pair = AgentPair(a0, a1) start_state = OvercookedState([P( (2, 2), n), P((2, 1), n)], {}, order_list=['any']) env = OvercookedEnv.from_mdp(large_mdp, start_state_fn=lambda: start_state) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) end_state = trajectory[-1][0] self.assertEqual(end_state.order_list, [])
def get_overcooked_obj_attr(attr, env=None, mdp=None, env_params=None, mdp_params=None): """ returns overcooked object attribute based on its name; used mostly to get state processing (encoding) functions and gym spaces when receives string parse it to get attribute; format is "env"/"mdp" + "." + method name i.e "env.lossless_state_encoding_mdp" also support dicts (where replaces strings in values with object attributes) when receives method/function returns original method; this obviously does not work this way if attr is str/dict """ attr_type = type(attr) if attr_type is str: name = attr [obj_name, attr_name] = name.split(".") if obj_name == "mdp": if not mdp: if env: mdp = env.mdp else: mdp = OvercookedGridworld(**mdp_params) attr = getattr(mdp, attr_name) elif obj_name == "env": if not env: if not mdp: mdp = OvercookedGridworld(**mdp_params) env_params = only_valid_named_args(env_params, OvercookedEnv.from_mdp) env = OvercookedEnv.from_mdp(mdp, **env_params) attr = getattr(env, attr_name) # not tested or used anywhere yet # elif obj_name in kwargs: # attr = getattr(kwargs[obj_name], attr_name) else: raise ValueError("Unsupported obj attr string " + name) elif attr_type is dict: attr = { k: get_overcooked_obj_attr(v, env=env, mdp=mdp, env_params=env_params, mdp_params=mdp_params) for k, v in attr.items() } # not tested or used anywhere yet # elif attr_type in [list, tuple]: # attr = attr_type(get_overcooked_obj_attr(elem, env=env, mdp=mdp, env_params=env_params, # mdp_params=mdp_params) for elem in attr) return attr
def test_two_coupled_agents_coupled_pair(self): mlp_simple = MediumLevelPlanner.from_pickle_or_compute( simple_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) cp_agent = CoupledPlanningAgent(mlp_simple) agent_pair = CoupledPlanningPair(cp_agent) start_state = OvercookedState([P( (2, 2), n), P((2, 1), n)], {}, order_list=['any']) env = OvercookedEnv.from_mdp(simple_mdp, start_state_fn=lambda: start_state) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) end_state = trajectory[-1][0] self.assertEqual(end_state.order_list, [])
def test_two_greedy_human_open_map(self): scenario_2_mdp = OvercookedGridworld.from_layout_name('scenario2') mlam = MediumLevelActionManager.from_pickle_or_compute( scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) a0 = GreedyHumanModel(mlam) a1 = GreedyHumanModel(mlam) agent_pair = AgentPair(a0, a1) start_state = OvercookedState( [P((8, 1), s), P((1, 1), s)], {}, all_orders=scenario_2_mdp.start_all_orders) env = OvercookedEnv.from_mdp(scenario_2_mdp, start_state_fn=lambda: start_state, horizon=100) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
def test_one_coupled_one_greedy_human(self): # Even though in the first ~10 timesteps it seems like agent 1 is wasting time # it turns out that this is actually not suboptimal as the true bottleneck is # going to be agent 0 later on (when it goes to get the 3rd onion) a0 = GreedyHumanModel(self.mlp_large) a1 = CoupledPlanningAgent(self.mlp_large) agent_pair = AgentPair(a0, a1) start_state = OvercookedState([P( (2, 1), s), P((1, 1), s)], {}, order_list=['onion']) env = OvercookedEnv.from_mdp(large_mdp, start_state_fn=lambda: start_state) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) end_state = trajectory[-1][0] self.assertEqual(end_state.order_list, [])
def test_agents_on_open_map(self): scenario_2_mdp = OvercookedGridworld.from_layout_name('scenario2') mlam = MediumLevelActionManager.from_pickle_or_compute(scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) agent_pairs = [ AgentPair(GreedyHumanModel(mlam), GreedyHumanModel(mlam)), AgentPair(SimpleGreedyHumanModel(mlam), SimpleGreedyHumanModel(mlam)), AgentPair(RandomAgent(all_actions=True), RandomAgent(all_actions=True)), AgentPair(RandomAgent(all_actions=False), RandomAgent(all_actions=False)) ] start_state = OvercookedState( [P((8, 1), s), P((1, 1), s)], {}, all_orders=scenario_2_mdp.start_all_orders ) for agent_pair in agent_pairs: env = OvercookedEnv.from_mdp(scenario_2_mdp, start_state_fn=lambda: start_state, horizon=100) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
def test_two_greedy_human_open_map(self): scenario_2_mdp = OvercookedGridworld.from_layout_name( 'scenario2', start_order_list=['any'], cook_time=5) mlp = MediumLevelPlanner.from_pickle_or_compute( scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute) a0 = GreedyHumanModel(mlp) a1 = GreedyHumanModel(mlp) agent_pair = AgentPair(a0, a1) start_state = OvercookedState([P( (8, 1), s), P((1, 1), s)], {}, order_list=['onion']) env = OvercookedEnv.from_mdp(scenario_2_mdp, start_state_fn=lambda: start_state, horizon=100) trajectory, time_taken, _, _ = env.run_agents(agent_pair, include_final_state=True, display=DISPLAY) end_state = trajectory[-1][0] self.assertEqual(len(end_state.order_list), 0)
def test_transitions_and_environment(self): bad_state = OvercookedState([P((0, 0), s), P((3, 1), s)], {}) with self.assertRaises(AssertionError): self.base_mdp.get_state_transition(bad_state, stay) env = OvercookedEnv.from_mdp(self.base_mdp) def check_transition(action, expected_path, recompute=False): # Compute actual values state = env.state pred_state, _ = self.base_mdp.get_state_transition(state, action) new_state, sparse_reward, _, _ = env.step(action) self.assertEqual(pred_state, new_state, '\n' + str(pred_state) + '\n' + str(new_state)) # Recompute expected values if desired if recompute: actual = { "state": pred_state.to_dict(), "reward": sparse_reward } save_as_json(actual, expected_path) # Compute expected values expected = load_from_json(expected_path) expected_state = OvercookedState.from_dict(expected['state']) expected_reward = expected['reward'] # Make sure everything lines up (note __eq__ is transitive) self.assertTrue( pred_state.time_independent_equal(expected_state), '\n' + str(pred_state) + '\n' + str(expected_state)) self.assertEqual(sparse_reward, expected_reward) expected_path = os.path.join(TESTING_DATA_DIR, "test_transitions_and_environments", "expected.json") # NOTE: set 'recompute=True' if deliberately updating state dynamics check_transition([n, e], expected_path, recompute=False)
def setUp(self): if not os.path.exists(self.temp_dir): os.makedirs(self.temp_dir) self.base_mdp = OvercookedGridworld.from_layout_name(self.layout_name) self.mlam = MediumLevelActionManager.from_pickle_or_compute( self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True, info=False) self.env = OvercookedEnv.from_mdp(self.base_mdp, horizon=self.horizon, info_level=0) self.starting_state_dict = self.base_mdp.get_standard_start_state( ).to_dict() outfile = process_human_trials_main(self.infile, self.temp_dir, insert_interacts=True, verbose=False, forward_port=False, fix_json=False) with open(outfile, 'rb') as f: self.human_data = pickle.load(f)[self.layout_name]
def test_get_gym_space(self): mdp = OvercookedGridworld.from_layout_name("cramped_room") mdp_params = mdp.mdp_params env_params = {"horizon": 100} env = OvercookedEnv.from_mdp(mdp, **env_params) example_gym_space_names = ["mdp.multi_hot_orders_encoding_gym_space", "mdp.featurize_state_gym_space", "mdp.lossless_state_encoding_gym_space"] example_gym_spaces = [mdp.multi_hot_orders_encoding_gym_space, mdp.featurize_state_gym_space, mdp.lossless_state_encoding_gym_space] for space_name, space in zip(example_gym_space_names, example_gym_spaces): space_from_name = get_gym_space(space_name, env=env) self.assertEqual(space_from_name, space) if space_name.split(".")[0] == "mdp": space_from_name = get_gym_space(space_name, mdp=mdp) self.assertEqual(space_from_name, space) space_from_name = get_gym_space(space_name, mdp_params=mdp_params) self.assertEqual(space_from_name, space) else: space_from_name = get_gym_space(space_name, env_params=env_params, mdp_params=mdp_params) self.assertEqual(space_from_name, space) expected_space = gym.spaces.Dict({str(i): space for i,space in enumerate(example_gym_spaces)}) actual_space = get_gym_space({str(i): space_name for i, space_name in enumerate(example_gym_space_names)}, env=env) self.assertEqual(expected_space, actual_space)
from stable_baselines3 import HER, DDPG, DQN, SAC, TD3 from stable_baselines3.her.goal_selection_strategy import GoalSelectionStrategy from stable_baselines3.common.vec_env import DummyVecEnv from stable_baselines3.common.vec_env.obs_dict_wrapper import ObsDictWrapper from stable_baselines3.common.monitor import Monitor import stable_baselines3.common.env_checker from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise import numpy as np model_class = TD3 # works also with SAC, DDPG and TD3 mdp = OvercookedGridworld.from_layout_name("cramped_room_single") base_env = OvercookedEnv.from_mdp(mdp, horizon=1e4) env = gym.make('Overcooked-single-v0') env.custom_init(base_env, base_env.lossless_state_encoding_mdp_single) env = Monitor(env, "./her_overcooked/", allow_early_resets=True) # Available strategies (cf paper): future, final, episode # goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE # If True the HER transitions will get sampled online online_sampling = True # Time limit for the episodes max_episode_length = 50 action_noise = NormalActionNoise(mean=np.zeros(1), sigma=0.3 * np.ones(1))
def setUp(self): self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room") self.env = OvercookedEnv.from_mdp(self.base_mdp, **DEFAULT_ENV_PARAMS) self.rnd_agent_pair = AgentPair(FixedPlanAgent([]), FixedPlanAgent([])) np.random.seed(0)