def test_corridor_switch_no_clash_possible(solver_describer: SolverDescriber): grid = MapfGrid(['...', '@.@']) agents_starts = ((0, 0), (0, 2)) agents_goals = ((0, 2), (0, 0)) # These parameters are for making sure that the solver avoids collision regardless of reward efficiency env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.1, -0.001, 0, -1) info = {} policy = solver_describer.func(env, info) # Assert no conflict is possible interesting_state = env.locations_to_state(((1, 1), (0, 1))) expected_possible_actions = [ vector_action_to_integer((STAY, UP)), vector_action_to_integer((DOWN, UP)) ] assert policy.act(interesting_state) in expected_possible_actions # Check the policy performance reward, clashed, _ = evaluate_policy(policy, 100, 100) # Make sure no clash happened assert not clashed # Assert the reward is reasonable assert reward >= 100.0 * env.reward_of_living
def test_copy_mapf_env(self): grid = MapfGrid(['....', '....', '....', '....', '....']) env = MapfEnv(grid, 1, ((0, 0), ), ((4, 0), ), 0, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) env.step(vector_action_to_integer((RIGHT, ))) env_copy = copy(env) env_copy.step(vector_action_to_integer((RIGHT, )))
def test_reward_multiagent_soc_stay_actions(self): grid = MapfGrid(['....', '....', '....', '....']) start_locations = ((0, 0), (3, 3), (1, 1)) goal_locations = ((0, 1), (1, 3), (1, 2)) determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.SoC) right_stay_stay = vector_action_to_integer((RIGHT, STAY, STAY)) s, r, done, _ = determinstic_env.step(right_stay_stay) self.assertEqual(r, -3)
def test_reward_single_agent_makespan(self): grid = MapfGrid(['....', '....', '....', '....', '....']) start_locations = ((0, 0), ) goal_locations = ((4, 0), ) determinstic_env = MapfEnv(grid, 1, start_locations, goal_locations, 0, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) total_reward = 0 down_action = vector_action_to_integer((DOWN, )) _, r, _, _ = determinstic_env.step(down_action) total_reward += r _, r, _, _ = determinstic_env.step(down_action) total_reward += r _, r, _, _ = determinstic_env.step(down_action) total_reward += r s, r, done, _ = determinstic_env.step(down_action) total_reward += r self.assertEqual(s, determinstic_env.locations_to_state(goal_locations)) self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_GOAL) self.assertEqual(total_reward, REWARD_OF_GOAL + 4 * REWARD_OF_LIVING)
def test_similar_transitions_probability_summed(self): grid = MapfGrid(['..', '..']) env = MapfEnv(grid, 1, ((0, 0), ), ((1, 1), ), 0.1, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) a = vector_action_to_integer((STAY, STAY)) self.assertEqual(env.P[env.s][a], [((1, False), env.s, REWARD_OF_LIVING, False)])
def test_switch_spots_is_a_collision(self): grid = MapfGrid(['..']) agents_starts = ( (0, 0), (0, 1), ) agents_goals = ((0, 1), (0, 0)) determinstic_env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) s, r, done, _ = determinstic_env.step( vector_action_to_integer((RIGHT, LEFT))) # Assert the game terminated in a collision self.assertEqual(done, True) self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_CLASH)
def test_roni_scenario_with_id(self): # TODO: this test only pass when the first action in the ACTIONS array is STAY, # fix it to work without the cheating grid = MapfGrid(['.@.', '.@.', '...']) agents_starts = ((0, 0), (0, 2)) agents_goals = ((2, 0), (2, 2)) env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.01, -1, 1, -0.1) independent_joiont_policy = solve_independently_and_cross( env, [[0], [1]], partial(value_iteration, 1.0), {}) interesting_state = env.locations_to_state(((0, 0), (0, 2))) # Assert independent_joint_policy just choose the most efficient action self.assertEqual(independent_joiont_policy.act(interesting_state), vector_action_to_integer((DOWN, DOWN))) # Assert no conflict self.assertEqual(detect_conflict(env, independent_joiont_policy), None)
def create_sanity_mapf_env(n_rooms, room_size, n_agents, fail_prob, reward_of_clash, reward_of_goal, reward_of_living, optimization_criteria): single_room = ['.' * room_size] * room_size grid_lines = single_room[:] n_agents_per_room = int(n_agents / n_rooms) n_agents_last_room = n_agents - (n_agents_per_room * (n_rooms - 1)) agents_starts = tuple() agents_goals = tuple() if n_agents_last_room == 0 or n_agents_per_room == 0: raise ValueError( f"asked for a sanity env with {n_rooms} rooms and {n_agents} agents, There are redundant rooms" ) # concatenate n-1 rooms to a single room for i in range(n_rooms - 1): # Add the extra room to the map for line_idx, line in enumerate(grid_lines[:-1]): grid_lines[line_idx] = line + '@@' + single_room[line_idx] grid_lines[-1] = grid_lines[-1] + '..' + single_room[-1] for i in range(n_rooms): # Set the new start and goal locations according to current offset map_file, scen_file = map_name_to_files( f'empty-{room_size}-{room_size}', i % 25 + 1) if i != n_rooms - 1: orig_agents_starts, orig_agents_goals = parse_scen_file( scen_file, n_agents_per_room) else: orig_agents_starts, orig_agents_goals = parse_scen_file( scen_file, n_agents_last_room) new_agents_starts = tuple() for start in orig_agents_starts: new_start = (start[0], start[1] + (i) * (len(single_room[0]) + 2)) new_agents_starts += (new_start, ) new_agents_goals = tuple() for goal in orig_agents_goals: new_goal = (goal[0], goal[1] + (i) * (len(single_room[0]) + 2)) new_agents_goals += (new_goal, ) agents_starts += new_agents_starts agents_goals += new_agents_goals grid = MapfGrid(grid_lines) return MapfEnv(grid, n_agents, agents_starts, agents_goals, fail_prob, reward_of_clash, reward_of_goal, reward_of_living, optimization_criteria)
def test_colliding_agents_state_is_terminal_and_negative_reward(self): map_file_path = os.path.abspath( os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map')) grid = MapfGrid(parse_map_file(map_file_path)) # agents are starting a agent_starts = ((0, 0), (0, 2)) agents_goals = ((7, 7), (5, 5)) env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) transitions = [ ((round(prob, 2), collision), next_state, reward, done) for ((prob, collision), next_state, reward, done) in env.P[env.s][vector_action_to_integer((RIGHT, LEFT))] ] self.assertIn(((0.64, True), env.locations_to_state( ((0, 1), (0, 1))), REWARD_OF_LIVING + REWARD_OF_CLASH, True), set(transitions))
def test_reawrd_multiagent_makespan(self): grid = MapfGrid(['....', '....', '....', '....']) start_locations = ((0, 0), (3, 3), (1, 1)) goal_locations = ((0, 1), (1, 3), (1, 2)) determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) total_reward = 0 right_up_right = vector_action_to_integer((RIGHT, UP, RIGHT)) s, r, done, _ = determinstic_env.step(right_up_right) total_reward += r self.assertFalse(done) stay_up_stay = vector_action_to_integer((STAY, UP, STAY)) s, r, done, _ = determinstic_env.step(stay_up_stay) total_reward += r self.assertEqual(s, determinstic_env.locations_to_state(goal_locations)) self.assertTrue(done) self.assertEqual(total_reward, 2 * REWARD_OF_LIVING + REWARD_OF_GOAL)
def get_local_view(env: MapfEnv, agent_indexes: list, **kwargs): fail_prob = kwargs.get('fail_prob', env.fail_prob) vector_local_agents_starts = tuple( itertools.compress( env.agents_starts, [1 if x in agent_indexes else 0 for x in range(env.n_agents)])) vector_local_agents_goals = tuple( itertools.compress( env.agents_goals, [1 if x in agent_indexes else 0 for x in range(env.n_agents)])) return MapfEnv(env.grid, len(agent_indexes), vector_local_agents_starts, vector_local_agents_goals, fail_prob, env.reward_of_clash, env.reward_of_goal, env.reward_of_living, env.optimization_criteria)
def create_mapf_env(map_name, scen_id, n_agents, fail_prob, reward_of_clash, reward_of_goal, reward_of_living, optimization_criteria): if map_name.startswith('sanity'): [n_rooms, room_size] = [int(n) for n in map_name.split('-')[1:]] return create_sanity_mapf_env(n_rooms, room_size, n_agents, fail_prob, reward_of_clash, reward_of_goal, reward_of_living, optimization_criteria) map_file, scen_file = map_name_to_files(map_name, scen_id) grid = MapfGrid(parse_map_file(map_file)) agents_starts, agents_goals = parse_scen_file(scen_file, n_agents) n_agents = len(agents_goals) env = MapfEnv(grid, n_agents, agents_starts, agents_goals, fail_prob, reward_of_clash, reward_of_goal, reward_of_living, optimization_criteria) return env
def test_dijkstra_simple_env(self): """Test dijkstra algorithm on an environment which I can eye-ball testing""" grid = MapfGrid([ '..@..', '..@..', '.....', ]) agents_starts = ((0, 0), ) agents_goals = ((0, 4), ) env = MapfEnv(grid, 1, agents_starts, agents_goals, 0, 0, -1000, 0, -1) dijkstra_func = dijkstra_min_heuristic(env) vi_policy = value_iteration(1.0, env, {}) for i in range(env.nS): self.assertEqual(dijkstra_func(i), vi_policy.v[i])
def test_dijkstra_large_goal_reward(self): grid = MapfGrid([ '..@..', '..@..', '.....', ]) agents_starts = ((0, 0), ) agents_goals = ((0, 4), ) env = MapfEnv(grid, 1, agents_starts, agents_goals, 0, 0, -1000, 100, -1) dijkstra_func = dijkstra_min_heuristic(env) vi_policy = value_iteration(1.0, env, {}) for i in range(env.nS): self.assertEqual(dijkstra_func(i), vi_policy.v[i])
def deterministic_relaxation_prioritized_value_iteration_heuristic(gamma: float, env: MapfEnv) -> Callable[[int], float]: deterministic_env = MapfEnv(env.grid, env.n_agents, env.agents_starts, env.agents_goals, 0, 0, env.reward_of_clash, env.reward_of_goal, env.reward_of_living) # TODO: consider using RTDP instead of PVI here, this is theoretically bad but practically may give better results policy = prioritized_value_iteration(gamma, deterministic_env, {}) def heuristic_function(s): return policy.v[s] return heuristic_function
def test_detect_conflict_finds_classical_conflict(self): grid = MapfGrid(['...', '@.@', '...']) agents_starts = ((0, 0), (0, 2)) agents_goals = ((2, 0), (2, 2)) env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, 0, -1, 1, -0.01) policy1 = { 0: ACTIONS.index(RIGHT), 1: ACTIONS.index(STAY), 2: ACTIONS.index(DOWN), 3: ACTIONS.index(DOWN), 4: ACTIONS.index(LEFT), 5: ACTIONS.index(RIGHT), 6: ACTIONS.index(LEFT), } policy2 = { 0: ACTIONS.index(RIGHT), 1: ACTIONS.index(RIGHT), 2: ACTIONS.index(DOWN), 3: ACTIONS.index(DOWN), 4: ACTIONS.index(RIGHT), 5: ACTIONS.index(LEFT), 6: ACTIONS.index(STAY), } joint_policy = CrossedPolicy(env, [ DictPolicy(get_local_view(env, [0]), 1.0, policy1), DictPolicy(get_local_view(env, [1]), 1.0, policy2) ], [[0], [1]]) aux_local_env = get_local_view(env, [0]) self.assertEqual( detect_conflict(env, joint_policy), ((0, aux_local_env.locations_to_state( ((0, 0), )), aux_local_env.locations_to_state(((0, 1), ))), (1, aux_local_env.locations_to_state( ((0, 2), )), aux_local_env.locations_to_state(((0, 1), )))))
def test_detect_conflict_return_none_when_no_conflict(self): grid = MapfGrid(['...', '...', '...']) agents_starts = ((0, 0), (0, 2)) agents_goals = ((2, 0), (2, 2)) env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, 0, -1, 1, -0.01) policy1 = { 0: ACTIONS.index(DOWN), 1: ACTIONS.index(DOWN), 2: ACTIONS.index(DOWN), 3: ACTIONS.index(DOWN), 4: ACTIONS.index(DOWN), 5: ACTIONS.index(DOWN), 6: ACTIONS.index(DOWN), 7: ACTIONS.index(DOWN), 8: ACTIONS.index(DOWN), } policy2 = { 0: ACTIONS.index(DOWN), 1: ACTIONS.index(DOWN), 2: ACTIONS.index(DOWN), 3: ACTIONS.index(DOWN), 4: ACTIONS.index(DOWN), 5: ACTIONS.index(DOWN), 6: ACTIONS.index(DOWN), 7: ACTIONS.index(DOWN), 8: ACTIONS.index(DOWN), } joint_policy = CrossedPolicy(env, [ DictPolicy(get_local_view(env, [0]), 1.0, policy1), DictPolicy(get_local_view(env, [1]), 1.0, policy2) ], [[0], [1]]) self.assertEqual(detect_conflict(env, joint_policy), None)
def test_action_from_terminal_state_has_no_effect(self): grid = MapfGrid(['..', '..']) env = MapfEnv(grid, 1, ((0, 0), ), ((1, 1), ), 0, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) state, reward, done, _ = env.step(vector_action_to_integer((RIGHT, ))) self.assertEqual(reward, REWARD_OF_LIVING) self.assertEqual(done, False) state, reward, done, _ = env.step(vector_action_to_integer((DOWN, ))) self.assertEqual(reward, REWARD_OF_LIVING + REWARD_OF_GOAL) self.assertEqual(done, True) # now, after the game is finished - do another step and make sure it has not effect. state_after_done, reward_after_done, done_after_done, _ = env.step( vector_action_to_integer((UP, ))) self.assertEqual(state_after_done, state) self.assertEqual(done_after_done, True) self.assertEqual(reward_after_done, 0) # another time like I'm trying to reach the goal state_after_done, reward_after_done, done_after_done, _ = env.step( vector_action_to_integer((DOWN, ))) self.assertEqual(state_after_done, state) self.assertEqual(done_after_done, True) self.assertEqual(reward_after_done, 0)
long_ma_rtdp_min_dijkstra_describer, id_rtdp_describer, ] strong_tested_solvers = [ long_rtdp_stop_no_improvement_sum_heuristic_describer, long_ma_rtdp_sum_pvi_describer, long_id_rtdp_sum_pvi_describer, long_rtdp_stop_no_improvement_min_dijkstra_heuristic_describer, long_rtdp_stop_no_improvement_sum_dijkstra_heuristic_describer, long_ma_rtdp_sum_dijkstra_describer ] all_tested_solvers = weak_tested_solvers + mid_tested_solvers + strong_tested_solvers easy_envs = [ (MapfEnv(MapfGrid(['.' * 8] * 8), 1, ((7, 0), ), ((0, 7), ), 0.1, 0.1, -1000, 0, -1), 'empty_grid_single_agent'), (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...' '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0, 0, -0.001, 0, -1), 'symmetrical bottle-neck deterministic'), (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...' '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0, 0, -0.001, 100, -1), 'symmetrical bottle-neck deterministic large goal reward'), (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...' '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0.1, 0.1, -0.001, 0, -1), 'symmetrical bottle-neck stochastic'), (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..' '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0, -0.001, 0, -1), 'Asymmetrical bottle-neck deterministic'), (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..' '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0,
def lrtdp( heuristic_function: Callable[[MapfEnv], Callable[[int], float]], max_iterations: int, gamma: float, epsilon: float, env: MapfEnv, info: Dict, ) -> Policy: info['iterations'] = [] # initialize V to an upper bound env.reset() initial_state = env.s policy = LrtdpPolicy(env, gamma, heuristic_function(env)) # follow the greedy policy, for each transition do a bellman update on V n_iterations = 0 while initial_state not in policy.solved and n_iterations < max_iterations: n_iterations += 1 s = env.s start = time.time() path = [] # LRTDP Trial while s not in policy.solved: # env.render() a = greedy_action(env, s, policy.v, gamma) path.append((s, a)) # print(f'action {integer_action_to_vector(a, env.n_agents)} chosen') # time.sleep(1) new_v_s = sum([ prob * (reward + gamma * policy.v[next_state]) for prob, next_state, reward, done in env.P[s][a] ]) policy.v_partial_table[s] = new_v_s # simulate the step and sample a new state s, r, done, _ = env.step(a) if done: # add the state to done, the action does not matter path.append((s, 0)) break # iteration finished while path: state, action = path.pop() if not check_solved(policy, state, epsilon): break info['iterations'].append({ 'n_moves': len(path), 'time': round(time.time() - start, 2), 'n_states_solved': len(policy.solved), 'final_reward': r, }) env.reset() env.reset() return policy
def manhattan_distance(env: MapfEnv, s, a1, a2): """Return the manhattan distance between the two given agents in the given state""" locations = env.state_to_locations(s) return abs(locations[a1][0] - locations[a2][0]) + abs(locations[a1][1] - locations[a2][1])
def test_couple_detect_conflict_3_agents_multiple_agents_in_group(self): """This test may sometime be used to test detecting a conflict for only a couple of agents. The test will make sure that agent 0 got no conflicts with 1 and 2 while agents 1 and 2 do get a conflict. Now agent 1 will be a part of a group contains both agent 0 and 1 ([0,1]). This way agent 1 index in its group will be 1 and not 0. This case is catching a bug I had previously. """ grid = MapfGrid(['...', '...', '...']) agents_starts = ((0, 0), (2, 0), (2, 2)) agents_goals = ((0, 2), (2, 2), (2, 0)) env = MapfEnv(grid, 3, agents_starts, agents_goals, 0, 0, -1, 1, -0.01) single_agent_env = MapfEnv(grid, 1, (agents_starts[0], ), (agents_goals[0], ), 0, 0, -1, 1, -0.01) env01 = get_local_view(env, [0, 1]) # >>S # SSS # SSS policy0 = { 0: ACTIONS.index(RIGHT), 1: ACTIONS.index(STAY), 2: ACTIONS.index(STAY), 3: ACTIONS.index(RIGHT), 4: ACTIONS.index(STAY), 5: ACTIONS.index(STAY), 6: ACTIONS.index(STAY), 7: ACTIONS.index(STAY), 8: ACTIONS.index(STAY), } # SSS # SSS # >>S policy1 = { 0: ACTIONS.index(STAY), 1: ACTIONS.index(STAY), 2: ACTIONS.index(RIGHT), 3: ACTIONS.index(STAY), 4: ACTIONS.index(STAY), 5: ACTIONS.index(RIGHT), 6: ACTIONS.index(STAY), 7: ACTIONS.index(STAY), 8: ACTIONS.index(STAY), } # policy01 is a cross between agent 0 and agent 1 policy01 = {} for s0 in range(9): for s1 in range(9): joint_state = env01.locations_to_state( (single_agent_env.state_to_locations(s0)[0], single_agent_env.state_to_locations(s1)[0])) policy01[joint_state] = vector_action_to_integer( (integer_action_to_vector(policy0[s0], 1)[0], integer_action_to_vector(policy1[s1], 1)[0])) # SSS # SSS # S<< policy2 = { 0: ACTIONS.index(STAY), 1: ACTIONS.index(STAY), 2: ACTIONS.index(STAY), 3: ACTIONS.index(STAY), 4: ACTIONS.index(STAY), 5: ACTIONS.index(LEFT), 6: ACTIONS.index(STAY), 7: ACTIONS.index(STAY), 8: ACTIONS.index(LEFT), } joint_policy = CrossedPolicy(env, [ DictPolicy(env01, 1.0, policy01), DictPolicy(get_local_view(env, [2]), 1.0, policy2) ], [[0, 1], [2]]) aux_local_env = get_local_view(env, [0]) # Assert a conflict is found for agents 1 and 2 self.assertEqual( couple_detect_conflict(env, joint_policy, 2, 1), ((2, aux_local_env.locations_to_state( ((2, 2), )), aux_local_env.locations_to_state(((2, 1), ))), (1, aux_local_env.locations_to_state( ((2, 0), )), aux_local_env.locations_to_state(((2, 1), ))))) # Assert no conflict is found for agents 0 and 1 self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 1)) # Assert no conflict is found for agents 0 and 2 self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 2))
def test_transition_function_empty_grid(self): """Assert the basic steps are done right. * Define an empty 8x8 environment with two agents starting at (0,0),(7,7) and desire to reach (0,2),(5,7). * Perform one (RIGHT, UP) step and assert that the transitions are correct. * Perform another (RIGHT, UP) step from the most probable next state from before ((0,1), (6,7)) and assert that the transitions are correct again, including the terminal one. """ map_file_path = os.path.abspath( os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map')) grid = MapfGrid(parse_map_file(map_file_path)) # agents are starting a agent_starts = ((0, 0), (7, 7)) agents_goals = ((0, 2), (5, 7)) env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) first_step_transitions = [ ((round(prob, 2), collision), next_state, reward, done) for ((prob, collision), next_state, reward, done) in env.P[env.s][vector_action_to_integer((RIGHT, UP))] ] self.assertEqual( set(first_step_transitions), { ((0.64, False), env.locations_to_state( ((0, 1), (6, 7))), REWARD_OF_LIVING, False), # (RIGHT, UP) ((0.08, False), env.locations_to_state( ((1, 0), (6, 7))), REWARD_OF_LIVING, False), # (DOWN, UP) ((0.08, False), env.locations_to_state( ((0, 0), (6, 7))), REWARD_OF_LIVING, False), # (UP, UP) ((0.08, False), env.locations_to_state( ((0, 1), (7, 7))), REWARD_OF_LIVING, False), # (RIGHT, RIGHT) ((0.08, False), env.locations_to_state( ((0, 1), (7, 6))), REWARD_OF_LIVING, False), # (RIGHT, LEFT) ((0.01, False), env.locations_to_state( ((1, 0), (7, 7))), REWARD_OF_LIVING, False), # (DOWN, RIGHT) ((0.01, False), env.locations_to_state( ((1, 0), (7, 6))), REWARD_OF_LIVING, False), # (DOWN, LEFT) ((0.01, False), env.locations_to_state( ((0, 0), (7, 7))), REWARD_OF_LIVING, False), # (UP, RIGHT) ((0.01, False), env.locations_to_state( ((0, 0), (7, 6))), REWARD_OF_LIVING, False) # (UP, LEFT) }) wish_state = env.locations_to_state(((0, 1), (6, 7))) second_step_transitions = [ ((round(prob, 2), collision), next_state, reward, done) for ((prob, collision), next_state, reward, done) in env.P[wish_state][vector_action_to_integer((RIGHT, UP))] ] # [(0,0), (7,7)] self.assertEqual( set(second_step_transitions), { ((0.64, False), env.locations_to_state( ((0, 2), (5, 7))), REWARD_OF_LIVING + REWARD_OF_GOAL, True), # (RIGHT, UP) ((0.08, False), env.locations_to_state( ((1, 1), (5, 7))), REWARD_OF_LIVING, False), # (DOWN, UP) ((0.08, False), env.locations_to_state( ((0, 1), (5, 7))), REWARD_OF_LIVING, False), # (UP, UP) ((0.08, False), env.locations_to_state( ((0, 2), (6, 7))), REWARD_OF_LIVING, False), # (RIGHT, RIGHT) ((0.08, False), env.locations_to_state( ((0, 2), (6, 6))), REWARD_OF_LIVING, False), # (RIGHT, LEFT) ((0.01, False), env.locations_to_state( ((1, 1), (6, 7))), REWARD_OF_LIVING, False), # (DOWN, RIGHT) ((0.01, False), env.locations_to_state( ((1, 1), (6, 6))), REWARD_OF_LIVING, False), # (DOWN, LEFT) ((0.01, False), env.locations_to_state( ((0, 1), (6, 7))), REWARD_OF_LIVING, False), # (UP, RIGHT) ((0.01, False), env.locations_to_state( ((0, 1), (6, 6))), REWARD_OF_LIVING, False) # (UP, LEFT) })
def test_couple_detect_conflict_3_agents(self): """This test may sometime be used to test detecting a conflict for only a couple of agents. The test will make sure that agent 0 got no conflicts with 1 and 2 while agents 1 and 2 do get a conflict. """ grid = MapfGrid(['...', '...', '...']) agents_starts = ((0, 0), (2, 0), (2, 2)) agents_goals = ((0, 2), (2, 2), (2, 0)) env = MapfEnv(grid, 3, agents_starts, agents_goals, 0, 0, -1, 1, -0.01) # >>S # SSS # SSS policy0 = { 0: ACTIONS.index(RIGHT), 1: ACTIONS.index(STAY), 2: ACTIONS.index(STAY), 3: ACTIONS.index(RIGHT), 4: ACTIONS.index(STAY), 5: ACTIONS.index(STAY), 6: ACTIONS.index(STAY), 7: ACTIONS.index(STAY), 8: ACTIONS.index(STAY), } # SSS # SSS # >>S policy1 = { 0: ACTIONS.index(STAY), 1: ACTIONS.index(STAY), 2: ACTIONS.index(RIGHT), 3: ACTIONS.index(STAY), 4: ACTIONS.index(STAY), 5: ACTIONS.index(RIGHT), 6: ACTIONS.index(STAY), 7: ACTIONS.index(STAY), 8: ACTIONS.index(STAY), } # SSS # SSS # S<< policy2 = { 0: ACTIONS.index(STAY), 1: ACTIONS.index(STAY), 2: ACTIONS.index(STAY), 3: ACTIONS.index(STAY), 4: ACTIONS.index(STAY), 5: ACTIONS.index(LEFT), 6: ACTIONS.index(STAY), 7: ACTIONS.index(STAY), 8: ACTIONS.index(LEFT), } joint_policy = CrossedPolicy(env, [ DictPolicy(get_local_view(env, [0]), 1.0, policy0), DictPolicy(get_local_view(env, [1]), 1.0, policy1), DictPolicy(get_local_view(env, [2]), 1.0, policy2) ], [[0], [1], [2]]) aux_local_env = get_local_view(env, [0]) # Assert a conflict is found for agents 1 and 2 self.assertEqual( couple_detect_conflict(env, joint_policy, 1, 2), ((1, aux_local_env.locations_to_state( ((2, 0), )), aux_local_env.locations_to_state(((2, 1), ))), (2, aux_local_env.locations_to_state( ((2, 2), )), aux_local_env.locations_to_state(((2, 1), ))))) # Assert no conflict is found for agents 0 and 1 self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 1)) # Assert no conflict is found for agents 0 and 2 self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 2))
def test_predecessors(self): """Assert the predecessors function works correctly. Create an environment which looks like that: .... ..0. .1.. 3X4 grid. agent 0 is at (1,2) agent 1 is at (2,1) The predecessors for agent 0 are: 1. (0,2) 2. (1,1) 3. (1,3) 4. (2,2) The predecessors for agent 1 are: 1. (2,2) 2. (2,0) 3. (1,1) Therefore, the predecessors states of the initial state corresponds to these locations: 1. ((0,2), (2,2)) 2. ((0,2), (2,0)) 3. ((0,2), (1,1)) 4. ((0,2), (2,1)) 5. ((1,1), (2,2)) 6. ((1,1), (2,0)) 7. ((1,1), (1,1)) 8. ((1,1), (2,1)) 9. ((1,3), (2,2)) 10. ((1,3), (2,0)) 11. ((1,3), (1,1)) 12. ((1,3), (2,1)) 13. ((2,2), (2,2)) 14. ((2,2), (2,0)) 15. ((2,2), (1,1)) 16. ((2,2), (2,1)) 17. ((1,2), (2,2)) 18. ((1,2), (2,0)) 19. ((1,2), (1,1)) 20. ((1,2), (2,1)) """ grid = MapfGrid(['....', '....', '....']) agents_starts = ((1, 2), (2, 1)) # don't care agents_goals = ((0, 0), (2, 3)) env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING, OptimizationCriteria.Makespan) expected_locations = [((0, 2), (2, 2)), ((0, 2), (2, 0)), ((0, 2), (1, 1)), ((0, 2), (2, 1)), ((1, 1), (2, 2)), ((1, 1), (2, 0)), ((1, 1), (1, 1)), ((1, 1), (2, 1)), ((1, 3), (2, 2)), ((1, 3), (2, 0)), ((1, 3), (1, 1)), ((1, 3), (2, 1)), ((2, 2), (2, 2)), ((2, 2), (2, 0)), ((2, 2), (1, 1)), ((2, 2), (2, 1)), ((1, 2), (2, 2)), ((1, 2), (2, 0)), ((1, 2), (1, 1)), ((1, 2), (2, 1))] expected_states = [ env.locations_to_state(loc) for loc in expected_locations ] self.assertSetEqual(set(expected_states), set(env.predecessors(env.s)))