def test_grid_size(): env = Warehouse( shelf_columns=1, column_height=3, shelf_rows=3, n_agents=1, msg_bits=0, sensor_range=1, request_queue_size=5, max_inactivity_steps=None, max_steps=None, reward_type=RewardType.GLOBAL, ) assert env.grid_size == (14, 4) env = Warehouse( shelf_columns=3, column_height=3, shelf_rows=3, n_agents=1, msg_bits=0, sensor_range=1, request_queue_size=5, max_inactivity_steps=None, max_steps=None, reward_type=RewardType.GLOBAL, ) assert env.grid_size == (14, 10)
def test_goal_4(env_0: Warehouse): assert env_0.request_queue[0] == env_0.shelfs[0] _, rewards, _, _ = env_0.step([Action.LEFT]) assert rewards[0] == pytest.approx(0.0) _, rewards, _, _ = env_0.step([Action.LEFT]) assert rewards[0] == pytest.approx(0.0) _, rewards, _, _ = env_0.step([Action.FORWARD]) assert env_0.agents[0].x == 4 assert env_0.agents[0].y == 26 assert env_0.request_queue[0] == env_0.shelfs[0] assert rewards[0] == pytest.approx(0.0)
def test_obs_space_2(): env = Warehouse( shelf_columns=1, column_height=3, shelf_rows=3, n_agents=10, msg_bits=5, sensor_range=1, request_queue_size=5, max_inactivity_steps=None, max_steps=None, reward_type=RewardType.GLOBAL, ) obs = env.reset() for s, o in zip(env.observation_space, obs): assert len(gym.spaces.flatten(s, o)) == env._obs_length
def test_obs_space_1(): env = Warehouse( shelf_columns=1, column_height=3, shelf_rows=3, n_agents=10, msg_bits=5, sensor_range=1, request_queue_size=5, max_inactivity_steps=None, max_steps=None, reward_type=RewardType.GLOBAL, ) obs = env.reset() for _ in range(200): obs, _, _, _ = env.step(env.action_space.sample()) assert env.observation_space.contains(obs)
def test_goal_2(env_1: Warehouse): assert env_1.request_queue[0] == env_1.shelfs[0] _, rewards, _, _ = env_1.step([Action.FORWARD, Action.NOOP]) assert env_1.agents[0].x == 4 assert env_1.agents[0].y == 28 assert env_1.request_queue[0] != env_1.shelfs[0] assert rewards[0] == pytest.approx(1.0) assert rewards[1] == pytest.approx(1.0)
def test_time_limit(time_limit): env = Warehouse( shelf_columns=1, column_height=3, shelf_rows=3, n_agents=10, msg_bits=5, sensor_range=1, request_queue_size=5, max_inactivity_steps=None, max_steps=time_limit, reward_type=RewardType.GLOBAL, ) _ = env.reset() for _ in range(time_limit - 1): _, _, done, _ = env.step(env.action_space.sample()) assert done == 10 * [False] _, _, done, _ = env.step(env.action_space.sample()) assert done == 10 * [True]
def env_0(): env = Warehouse(3, 8, 3, 1, 0, 1, 5, 10, None, RewardType.GLOBAL) env.reset() env.agents[0].x = 4 # should place it in the middle (empty space) env.agents[0].y = 27 env.agents[0].dir = Direction.DOWN env.shelfs[0].x = 4 env.shelfs[0].y = 27 env.agents[0].carrying_shelf = env.shelfs[0] env.request_queue[0] = env.shelfs[0] env._recalc_grid() return env
def test_action_space_1(): env = Warehouse( shelf_columns=1, column_height=3, shelf_rows=3, n_agents=2, msg_bits=1, sensor_range=1, request_queue_size=5, max_inactivity_steps=None, max_steps=None, reward_type=RewardType.GLOBAL, ) env.reset() assert env.action_space == spaces.Tuple( 2 * (spaces.MultiDiscrete([len(Action), 2]), )) env.step(env.action_space.sample())
def env_double_agent_with_msg(): env = Warehouse(3, 8, 3, 2, 2, 1, 5, None, None, RewardType.GLOBAL) env.reset() return env
def env_single_agent(): env = Warehouse(3, 8, 3, 1, 0, 1, 5, None, None, RewardType.GLOBAL) env.reset() return env
def test_fast_obs_2(): env = Warehouse(3, 8, 3, 3, 2, 1, 5, 10, None, RewardType.GLOBAL, fast_obs=False) env.reset() slow_obs_space = env.observation_space for _ in range(10): slow_obs = [env._make_obs(agent) for agent in env.agents] env._use_fast_obs() fast_obs = [env._make_obs(agent) for agent in env.agents] assert len(fast_obs) == 3 assert len(slow_obs) == 3 flattened_slow = [ spaces.flatten(osp, obs) for osp, obs in zip(slow_obs_space, slow_obs) ] for i in range(len(fast_obs)): print(slow_obs[0]) assert list(fast_obs[i]) == list(flattened_slow[i]) env._use_slow_obs() env.step(env.action_space.sample())
def env_five_agents(): env = Warehouse(3, 8, 3, 5, 0, 1, 5, None, None, RewardType.GLOBAL) env.reset() return env