예제 #1
0
def test_corridor_switch_no_clash_possible(solver_describer: SolverDescriber):
    grid = MapfGrid(['...', '@.@'])
    agents_starts = ((0, 0), (0, 2))
    agents_goals = ((0, 2), (0, 0))

    # These parameters are for making sure that the solver avoids collision regardless of reward efficiency
    env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.1, -0.001, 0,
                  -1)

    info = {}
    policy = solver_describer.func(env, info)

    # Assert no conflict is possible
    interesting_state = env.locations_to_state(((1, 1), (0, 1)))
    expected_possible_actions = [
        vector_action_to_integer((STAY, UP)),
        vector_action_to_integer((DOWN, UP))
    ]

    assert policy.act(interesting_state) in expected_possible_actions

    # Check the policy performance
    reward, clashed, _ = evaluate_policy(policy, 100, 100)

    # Make sure no clash happened
    assert not clashed

    # Assert the reward is reasonable
    assert reward >= 100.0 * env.reward_of_living
예제 #2
0
    def test_copy_mapf_env(self):
        grid = MapfGrid(['....', '....', '....', '....', '....'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((4, 0), ), 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        env.step(vector_action_to_integer((RIGHT, )))

        env_copy = copy(env)
        env_copy.step(vector_action_to_integer((RIGHT, )))
예제 #3
0
    def test_reward_multiagent_soc_stay_actions(self):
        grid = MapfGrid(['....', '....', '....', '....'])

        start_locations = ((0, 0), (3, 3), (1, 1))
        goal_locations = ((0, 1), (1, 3), (1, 2))

        determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING, OptimizationCriteria.SoC)

        right_stay_stay = vector_action_to_integer((RIGHT, STAY, STAY))
        s, r, done, _ = determinstic_env.step(right_stay_stay)
        self.assertEqual(r, -3)
예제 #4
0
    def test_reward_single_agent_makespan(self):
        grid = MapfGrid(['....', '....', '....', '....', '....'])

        start_locations = ((0, 0), )
        goal_locations = ((4, 0), )

        determinstic_env = MapfEnv(grid, 1, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)
        total_reward = 0
        down_action = vector_action_to_integer((DOWN, ))
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        s, r, done, _ = determinstic_env.step(down_action)
        total_reward += r

        self.assertEqual(s,
                         determinstic_env.locations_to_state(goal_locations))
        self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_GOAL)

        self.assertEqual(total_reward, REWARD_OF_GOAL + 4 * REWARD_OF_LIVING)
예제 #5
0
    def test_similar_transitions_probability_summed(self):
        grid = MapfGrid(['..', '..'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((1, 1), ), 0.1, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        a = vector_action_to_integer((STAY, STAY))
        self.assertEqual(env.P[env.s][a],
                         [((1, False), env.s, REWARD_OF_LIVING, False)])
예제 #6
0
    def test_switch_spots_is_a_collision(self):
        grid = MapfGrid(['..'])

        agents_starts = (
            (0, 0),
            (0, 1),
        )
        agents_goals = ((0, 1), (0, 0))

        determinstic_env = MapfEnv(grid, 2, agents_starts, agents_goals, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)

        s, r, done, _ = determinstic_env.step(
            vector_action_to_integer((RIGHT, LEFT)))

        # Assert the game terminated in a collision
        self.assertEqual(done, True)
        self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_CLASH)
예제 #7
0
    def test_roni_scenario_with_id(self):
        # TODO: this test only pass when the first action in the ACTIONS array is STAY,
        #  fix it to work without the cheating
        grid = MapfGrid(['.@.', '.@.', '...'])
        agents_starts = ((0, 0), (0, 2))
        agents_goals = ((2, 0), (2, 2))

        env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.01, -1, 1,
                      -0.1)

        independent_joiont_policy = solve_independently_and_cross(
            env, [[0], [1]], partial(value_iteration, 1.0), {})

        interesting_state = env.locations_to_state(((0, 0), (0, 2)))

        # Assert independent_joint_policy just choose the most efficient action
        self.assertEqual(independent_joiont_policy.act(interesting_state),
                         vector_action_to_integer((DOWN, DOWN)))

        # Assert no conflict
        self.assertEqual(detect_conflict(env, independent_joiont_policy), None)
예제 #8
0
def create_sanity_mapf_env(n_rooms, room_size, n_agents, fail_prob,
                           reward_of_clash, reward_of_goal, reward_of_living,
                           optimization_criteria):
    single_room = ['.' * room_size] * room_size
    grid_lines = single_room[:]
    n_agents_per_room = int(n_agents / n_rooms)
    n_agents_last_room = n_agents - (n_agents_per_room * (n_rooms - 1))
    agents_starts = tuple()
    agents_goals = tuple()

    if n_agents_last_room == 0 or n_agents_per_room == 0:
        raise ValueError(
            f"asked for a sanity env with {n_rooms} rooms  and {n_agents} agents, There are redundant rooms"
        )

    # concatenate n-1 rooms to a single room
    for i in range(n_rooms - 1):
        # Add the extra room to the map
        for line_idx, line in enumerate(grid_lines[:-1]):
            grid_lines[line_idx] = line + '@@' + single_room[line_idx]

        grid_lines[-1] = grid_lines[-1] + '..' + single_room[-1]

    for i in range(n_rooms):
        # Set the new start and goal locations according to current offset
        map_file, scen_file = map_name_to_files(
            f'empty-{room_size}-{room_size}', i % 25 + 1)
        if i != n_rooms - 1:
            orig_agents_starts, orig_agents_goals = parse_scen_file(
                scen_file, n_agents_per_room)
        else:
            orig_agents_starts, orig_agents_goals = parse_scen_file(
                scen_file, n_agents_last_room)

        new_agents_starts = tuple()
        for start in orig_agents_starts:
            new_start = (start[0], start[1] + (i) * (len(single_room[0]) + 2))
            new_agents_starts += (new_start, )

        new_agents_goals = tuple()
        for goal in orig_agents_goals:
            new_goal = (goal[0], goal[1] + (i) * (len(single_room[0]) + 2))
            new_agents_goals += (new_goal, )

        agents_starts += new_agents_starts
        agents_goals += new_agents_goals

    grid = MapfGrid(grid_lines)

    return MapfEnv(grid, n_agents, agents_starts, agents_goals, fail_prob,
                   reward_of_clash, reward_of_goal, reward_of_living,
                   optimization_criteria)
예제 #9
0
    def test_colliding_agents_state_is_terminal_and_negative_reward(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))

        grid = MapfGrid(parse_map_file(map_file_path))

        # agents are starting a
        agent_starts = ((0, 0), (0, 2))
        agents_goals = ((7, 7), (5, 5))

        env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB,
                      REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)
        transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[env.s][vector_action_to_integer((RIGHT, LEFT))]
        ]

        self.assertIn(((0.64, True), env.locations_to_state(
            ((0, 1), (0, 1))), REWARD_OF_LIVING + REWARD_OF_CLASH, True),
                      set(transitions))
예제 #10
0
    def test_reawrd_multiagent_makespan(self):
        grid = MapfGrid(['....', '....', '....', '....'])

        start_locations = ((0, 0), (3, 3), (1, 1))
        goal_locations = ((0, 1), (1, 3), (1, 2))

        determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)

        total_reward = 0
        right_up_right = vector_action_to_integer((RIGHT, UP, RIGHT))
        s, r, done, _ = determinstic_env.step(right_up_right)
        total_reward += r
        self.assertFalse(done)

        stay_up_stay = vector_action_to_integer((STAY, UP, STAY))
        s, r, done, _ = determinstic_env.step(stay_up_stay)
        total_reward += r
        self.assertEqual(s,
                         determinstic_env.locations_to_state(goal_locations))
        self.assertTrue(done)
        self.assertEqual(total_reward, 2 * REWARD_OF_LIVING + REWARD_OF_GOAL)
예제 #11
0
def get_local_view(env: MapfEnv, agent_indexes: list, **kwargs):
    fail_prob = kwargs.get('fail_prob', env.fail_prob)

    vector_local_agents_starts = tuple(
        itertools.compress(
            env.agents_starts,
            [1 if x in agent_indexes else 0 for x in range(env.n_agents)]))

    vector_local_agents_goals = tuple(
        itertools.compress(
            env.agents_goals,
            [1 if x in agent_indexes else 0 for x in range(env.n_agents)]))

    return MapfEnv(env.grid, len(agent_indexes), vector_local_agents_starts,
                   vector_local_agents_goals, fail_prob, env.reward_of_clash,
                   env.reward_of_goal, env.reward_of_living,
                   env.optimization_criteria)
예제 #12
0
def create_mapf_env(map_name, scen_id, n_agents, fail_prob, reward_of_clash,
                    reward_of_goal, reward_of_living, optimization_criteria):
    if map_name.startswith('sanity'):
        [n_rooms, room_size] = [int(n) for n in map_name.split('-')[1:]]
        return create_sanity_mapf_env(n_rooms, room_size, n_agents, fail_prob,
                                      reward_of_clash, reward_of_goal,
                                      reward_of_living, optimization_criteria)

    map_file, scen_file = map_name_to_files(map_name, scen_id)
    grid = MapfGrid(parse_map_file(map_file))
    agents_starts, agents_goals = parse_scen_file(scen_file, n_agents)
    n_agents = len(agents_goals)

    env = MapfEnv(grid, n_agents, agents_starts, agents_goals, fail_prob,
                  reward_of_clash, reward_of_goal, reward_of_living,
                  optimization_criteria)

    return env
예제 #13
0
    def test_dijkstra_simple_env(self):
        """Test dijkstra algorithm on an environment which I can eye-ball testing"""
        grid = MapfGrid([
            '..@..',
            '..@..',
            '.....',
        ])

        agents_starts = ((0, 0), )
        agents_goals = ((0, 4), )

        env = MapfEnv(grid, 1, agents_starts, agents_goals, 0, 0, -1000, 0, -1)

        dijkstra_func = dijkstra_min_heuristic(env)
        vi_policy = value_iteration(1.0, env, {})

        for i in range(env.nS):
            self.assertEqual(dijkstra_func(i), vi_policy.v[i])
예제 #14
0
    def test_dijkstra_large_goal_reward(self):
        grid = MapfGrid([
            '..@..',
            '..@..',
            '.....',
        ])

        agents_starts = ((0, 0), )
        agents_goals = ((0, 4), )

        env = MapfEnv(grid, 1, agents_starts, agents_goals, 0, 0, -1000, 100,
                      -1)

        dijkstra_func = dijkstra_min_heuristic(env)
        vi_policy = value_iteration(1.0, env, {})

        for i in range(env.nS):
            self.assertEqual(dijkstra_func(i), vi_policy.v[i])
예제 #15
0
def deterministic_relaxation_prioritized_value_iteration_heuristic(gamma: float,
                                                                   env: MapfEnv) -> Callable[[int], float]:
    deterministic_env = MapfEnv(env.grid,
                                env.n_agents,
                                env.agents_starts,
                                env.agents_goals,
                                0,
                                0,
                                env.reward_of_clash,
                                env.reward_of_goal,
                                env.reward_of_living)
    # TODO: consider using RTDP instead of PVI here, this is theoretically bad but practically may give better results
    policy = prioritized_value_iteration(gamma, deterministic_env, {})

    def heuristic_function(s):
        return policy.v[s]

    return heuristic_function
예제 #16
0
    def test_detect_conflict_finds_classical_conflict(self):
        grid = MapfGrid(['...', '@.@', '...'])

        agents_starts = ((0, 0), (0, 2))
        agents_goals = ((2, 0), (2, 2))

        env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, 0, -1, 1, -0.01)

        policy1 = {
            0: ACTIONS.index(RIGHT),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(DOWN),
            3: ACTIONS.index(DOWN),
            4: ACTIONS.index(LEFT),
            5: ACTIONS.index(RIGHT),
            6: ACTIONS.index(LEFT),
        }

        policy2 = {
            0: ACTIONS.index(RIGHT),
            1: ACTIONS.index(RIGHT),
            2: ACTIONS.index(DOWN),
            3: ACTIONS.index(DOWN),
            4: ACTIONS.index(RIGHT),
            5: ACTIONS.index(LEFT),
            6: ACTIONS.index(STAY),
        }

        joint_policy = CrossedPolicy(env, [
            DictPolicy(get_local_view(env, [0]), 1.0, policy1),
            DictPolicy(get_local_view(env, [1]), 1.0, policy2)
        ], [[0], [1]])

        aux_local_env = get_local_view(env, [0])

        self.assertEqual(
            detect_conflict(env, joint_policy),
            ((0, aux_local_env.locations_to_state(
                ((0, 0), )), aux_local_env.locations_to_state(((0, 1), ))),
             (1, aux_local_env.locations_to_state(
                 ((0, 2), )), aux_local_env.locations_to_state(((0, 1), )))))
예제 #17
0
    def test_detect_conflict_return_none_when_no_conflict(self):
        grid = MapfGrid(['...', '...', '...'])

        agents_starts = ((0, 0), (0, 2))
        agents_goals = ((2, 0), (2, 2))

        env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, 0, -1, 1, -0.01)

        policy1 = {
            0: ACTIONS.index(DOWN),
            1: ACTIONS.index(DOWN),
            2: ACTIONS.index(DOWN),
            3: ACTIONS.index(DOWN),
            4: ACTIONS.index(DOWN),
            5: ACTIONS.index(DOWN),
            6: ACTIONS.index(DOWN),
            7: ACTIONS.index(DOWN),
            8: ACTIONS.index(DOWN),
        }

        policy2 = {
            0: ACTIONS.index(DOWN),
            1: ACTIONS.index(DOWN),
            2: ACTIONS.index(DOWN),
            3: ACTIONS.index(DOWN),
            4: ACTIONS.index(DOWN),
            5: ACTIONS.index(DOWN),
            6: ACTIONS.index(DOWN),
            7: ACTIONS.index(DOWN),
            8: ACTIONS.index(DOWN),
        }

        joint_policy = CrossedPolicy(env, [
            DictPolicy(get_local_view(env, [0]), 1.0, policy1),
            DictPolicy(get_local_view(env, [1]), 1.0, policy2)
        ], [[0], [1]])

        self.assertEqual(detect_conflict(env, joint_policy), None)
예제 #18
0
    def test_action_from_terminal_state_has_no_effect(self):
        grid = MapfGrid(['..', '..'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((1, 1), ), 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        state, reward, done, _ = env.step(vector_action_to_integer((RIGHT, )))
        self.assertEqual(reward, REWARD_OF_LIVING)
        self.assertEqual(done, False)
        state, reward, done, _ = env.step(vector_action_to_integer((DOWN, )))
        self.assertEqual(reward, REWARD_OF_LIVING + REWARD_OF_GOAL)
        self.assertEqual(done, True)
        # now, after the game is finished - do another step and make sure it has not effect.
        state_after_done, reward_after_done, done_after_done, _ = env.step(
            vector_action_to_integer((UP, )))
        self.assertEqual(state_after_done, state)
        self.assertEqual(done_after_done, True)
        self.assertEqual(reward_after_done, 0)
        # another time like I'm trying to reach the goal
        state_after_done, reward_after_done, done_after_done, _ = env.step(
            vector_action_to_integer((DOWN, )))
        self.assertEqual(state_after_done, state)
        self.assertEqual(done_after_done, True)
        self.assertEqual(reward_after_done, 0)
예제 #19
0
    long_ma_rtdp_min_dijkstra_describer,
    id_rtdp_describer,
]

strong_tested_solvers = [
    long_rtdp_stop_no_improvement_sum_heuristic_describer,
    long_ma_rtdp_sum_pvi_describer, long_id_rtdp_sum_pvi_describer,
    long_rtdp_stop_no_improvement_min_dijkstra_heuristic_describer,
    long_rtdp_stop_no_improvement_sum_dijkstra_heuristic_describer,
    long_ma_rtdp_sum_dijkstra_describer
]

all_tested_solvers = weak_tested_solvers + mid_tested_solvers + strong_tested_solvers

easy_envs = [
    (MapfEnv(MapfGrid(['.' * 8] * 8), 1, ((7, 0), ), ((0, 7), ), 0.1, 0.1,
             -1000, 0, -1), 'empty_grid_single_agent'),
    (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...'
                       '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0, 0,
             -0.001, 0, -1), 'symmetrical bottle-neck deterministic'),
    (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...'
                       '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0, 0,
             -0.001, 100,
             -1), 'symmetrical bottle-neck deterministic large goal reward'),
    (MapfEnv(MapfGrid(['..@...', '..@...', '......', '..@...'
                       '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0.1,
             0.1, -0.001, 0, -1), 'symmetrical bottle-neck stochastic'),
    (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..'
                       '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0,
             -0.001, 0, -1), 'Asymmetrical bottle-neck deterministic'),
    (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..'
                       '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0,
예제 #20
0
def lrtdp(
    heuristic_function: Callable[[MapfEnv], Callable[[int], float]],
    max_iterations: int,
    gamma: float,
    epsilon: float,
    env: MapfEnv,
    info: Dict,
) -> Policy:
    info['iterations'] = []

    # initialize V to an upper bound
    env.reset()
    initial_state = env.s
    policy = LrtdpPolicy(env, gamma, heuristic_function(env))

    # follow the greedy policy, for each transition do a bellman update on V
    n_iterations = 0
    while initial_state not in policy.solved and n_iterations < max_iterations:
        n_iterations += 1
        s = env.s
        start = time.time()
        path = []

        # LRTDP Trial
        while s not in policy.solved:
            # env.render()
            a = greedy_action(env, s, policy.v, gamma)
            path.append((s, a))
            # print(f'action {integer_action_to_vector(a, env.n_agents)} chosen')
            # time.sleep(1)
            new_v_s = sum([
                prob * (reward + gamma * policy.v[next_state])
                for prob, next_state, reward, done in env.P[s][a]
            ])
            policy.v_partial_table[s] = new_v_s

            # simulate the step and sample a new state
            s, r, done, _ = env.step(a)
            if done:
                # add the state to done, the action does not matter
                path.append((s, 0))
                break

        # iteration finished
        while path:
            state, action = path.pop()
            if not check_solved(policy, state, epsilon):
                break

        info['iterations'].append({
            'n_moves': len(path),
            'time': round(time.time() - start, 2),
            'n_states_solved': len(policy.solved),
            'final_reward': r,
        })

        env.reset()

    env.reset()

    return policy
예제 #21
0
def manhattan_distance(env: MapfEnv, s, a1, a2):
    """Return the manhattan distance between the two given agents in the given state"""
    locations = env.state_to_locations(s)
    return abs(locations[a1][0] - locations[a2][0]) + abs(locations[a1][1] -
                                                          locations[a2][1])
예제 #22
0
    def test_couple_detect_conflict_3_agents_multiple_agents_in_group(self):
        """This test may sometime be used to test detecting a conflict for only a couple of agents.

            The test will make sure that agent 0 got no conflicts with 1 and 2 while agents 1 and 2 do get a conflict.
            Now agent 1 will be a part of a group contains both agent 0 and 1 ([0,1]). This way agent 1 index in its
            group will be 1 and not 0. This case is catching a bug I had previously.
        """
        grid = MapfGrid(['...', '...', '...'])

        agents_starts = ((0, 0), (2, 0), (2, 2))
        agents_goals = ((0, 2), (2, 2), (2, 0))
        env = MapfEnv(grid, 3, agents_starts, agents_goals, 0, 0, -1, 1, -0.01)
        single_agent_env = MapfEnv(grid, 1, (agents_starts[0], ),
                                   (agents_goals[0], ), 0, 0, -1, 1, -0.01)
        env01 = get_local_view(env, [0, 1])

        # >>S
        # SSS
        # SSS
        policy0 = {
            0: ACTIONS.index(RIGHT),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(STAY),
            3: ACTIONS.index(RIGHT),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(STAY),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(STAY),
        }

        # SSS
        # SSS
        # >>S
        policy1 = {
            0: ACTIONS.index(STAY),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(RIGHT),
            3: ACTIONS.index(STAY),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(RIGHT),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(STAY),
        }

        # policy01 is a cross between agent 0 and agent 1
        policy01 = {}
        for s0 in range(9):
            for s1 in range(9):
                joint_state = env01.locations_to_state(
                    (single_agent_env.state_to_locations(s0)[0],
                     single_agent_env.state_to_locations(s1)[0]))
                policy01[joint_state] = vector_action_to_integer(
                    (integer_action_to_vector(policy0[s0], 1)[0],
                     integer_action_to_vector(policy1[s1], 1)[0]))

        # SSS
        # SSS
        # S<<
        policy2 = {
            0: ACTIONS.index(STAY),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(STAY),
            3: ACTIONS.index(STAY),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(LEFT),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(LEFT),
        }

        joint_policy = CrossedPolicy(env, [
            DictPolicy(env01, 1.0, policy01),
            DictPolicy(get_local_view(env, [2]), 1.0, policy2)
        ], [[0, 1], [2]])

        aux_local_env = get_local_view(env, [0])

        # Assert a conflict is found for agents 1 and 2
        self.assertEqual(
            couple_detect_conflict(env, joint_policy, 2, 1),
            ((2, aux_local_env.locations_to_state(
                ((2, 2), )), aux_local_env.locations_to_state(((2, 1), ))),
             (1, aux_local_env.locations_to_state(
                 ((2, 0), )), aux_local_env.locations_to_state(((2, 1), )))))

        # Assert no conflict is found for agents 0 and 1
        self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 1))

        # Assert no conflict is found for agents 0 and 2
        self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 2))
예제 #23
0
    def test_transition_function_empty_grid(self):
        """Assert the basic steps are done right.

        * Define an empty 8x8 environment with two agents starting at (0,0),(7,7) and desire to reach (0,2),(5,7).
        * Perform one (RIGHT, UP) step and assert that the transitions are correct.
        * Perform another (RIGHT, UP) step from the most probable next state from before ((0,1), (6,7)) and assert
            that the transitions are correct again, including the terminal one.
        """
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        # agents are starting a
        agent_starts = ((0, 0), (7, 7))
        agents_goals = ((0, 2), (5, 7))

        env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB,
                      REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        first_step_transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[env.s][vector_action_to_integer((RIGHT, UP))]
        ]

        self.assertEqual(
            set(first_step_transitions),
            {
                ((0.64, False), env.locations_to_state(
                    ((0, 1), (6, 7))), REWARD_OF_LIVING, False),  # (RIGHT, UP)
                ((0.08, False), env.locations_to_state(
                    ((1, 0), (6, 7))), REWARD_OF_LIVING, False),  # (DOWN, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 0), (6, 7))), REWARD_OF_LIVING, False),  # (UP, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 1),
                     (7, 7))), REWARD_OF_LIVING, False),  # (RIGHT, RIGHT)
                ((0.08, False), env.locations_to_state(
                    ((0, 1),
                     (7, 6))), REWARD_OF_LIVING, False),  # (RIGHT, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((1, 0),
                     (7, 7))), REWARD_OF_LIVING, False),  # (DOWN, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((1, 0),
                     (7, 6))), REWARD_OF_LIVING, False),  # (DOWN, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((0, 0), (7, 7))), REWARD_OF_LIVING, False),  # (UP, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((0, 0), (7, 6))), REWARD_OF_LIVING, False)  # (UP, LEFT)
            })

        wish_state = env.locations_to_state(((0, 1), (6, 7)))
        second_step_transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[wish_state][vector_action_to_integer((RIGHT,
                                                                      UP))]
        ]

        # [(0,0), (7,7)]
        self.assertEqual(
            set(second_step_transitions),
            {
                ((0.64, False), env.locations_to_state(
                    ((0, 2),
                     (5, 7))), REWARD_OF_LIVING + REWARD_OF_GOAL, True),
                # (RIGHT, UP)
                ((0.08, False), env.locations_to_state(
                    ((1, 1), (5, 7))), REWARD_OF_LIVING, False),  # (DOWN, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 1), (5, 7))), REWARD_OF_LIVING, False),  # (UP, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 2),
                     (6, 7))), REWARD_OF_LIVING, False),  # (RIGHT, RIGHT)
                ((0.08, False), env.locations_to_state(
                    ((0, 2),
                     (6, 6))), REWARD_OF_LIVING, False),  # (RIGHT, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((1, 1),
                     (6, 7))), REWARD_OF_LIVING, False),  # (DOWN, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((1, 1),
                     (6, 6))), REWARD_OF_LIVING, False),  # (DOWN, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((0, 1), (6, 7))), REWARD_OF_LIVING, False),  # (UP, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((0, 1), (6, 6))), REWARD_OF_LIVING, False)  # (UP, LEFT)
            })
예제 #24
0
    def test_couple_detect_conflict_3_agents(self):
        """This test may sometime be used to test detecting a conflict for only a couple of agents.

        The test will make sure that agent 0 got no conflicts with 1 and 2 while agents 1 and 2 do get a conflict.
        """
        grid = MapfGrid(['...', '...', '...'])

        agents_starts = ((0, 0), (2, 0), (2, 2))
        agents_goals = ((0, 2), (2, 2), (2, 0))
        env = MapfEnv(grid, 3, agents_starts, agents_goals, 0, 0, -1, 1, -0.01)

        # >>S
        # SSS
        # SSS
        policy0 = {
            0: ACTIONS.index(RIGHT),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(STAY),
            3: ACTIONS.index(RIGHT),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(STAY),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(STAY),
        }

        # SSS
        # SSS
        # >>S
        policy1 = {
            0: ACTIONS.index(STAY),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(RIGHT),
            3: ACTIONS.index(STAY),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(RIGHT),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(STAY),
        }

        # SSS
        # SSS
        # S<<
        policy2 = {
            0: ACTIONS.index(STAY),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(STAY),
            3: ACTIONS.index(STAY),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(LEFT),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(LEFT),
        }

        joint_policy = CrossedPolicy(env, [
            DictPolicy(get_local_view(env, [0]), 1.0, policy0),
            DictPolicy(get_local_view(env, [1]), 1.0, policy1),
            DictPolicy(get_local_view(env, [2]), 1.0, policy2)
        ], [[0], [1], [2]])

        aux_local_env = get_local_view(env, [0])

        # Assert a conflict is found for agents 1 and 2
        self.assertEqual(
            couple_detect_conflict(env, joint_policy, 1, 2),
            ((1, aux_local_env.locations_to_state(
                ((2, 0), )), aux_local_env.locations_to_state(((2, 1), ))),
             (2, aux_local_env.locations_to_state(
                 ((2, 2), )), aux_local_env.locations_to_state(((2, 1), )))))

        # Assert no conflict is found for agents 0 and 1
        self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 1))

        # Assert no conflict is found for agents 0 and 2
        self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 2))
예제 #25
0
    def test_predecessors(self):
        """Assert the predecessors function works correctly.

        Create an environment which looks like that:
        ....
        ..0.
        .1..

        3X4 grid.
        agent 0 is at (1,2)
        agent 1 is at (2,1)

        The predecessors for agent 0 are:
        1. (0,2)
        2. (1,1)
        3. (1,3)
        4. (2,2)

        The predecessors for agent 1 are:
        1. (2,2)
        2. (2,0)
        3. (1,1)

        Therefore, the predecessors states of the initial state corresponds to these locations:
        1.  ((0,2), (2,2))
        2.  ((0,2), (2,0))
        3.  ((0,2), (1,1))
        4.  ((0,2), (2,1))
        5.  ((1,1), (2,2))
        6.  ((1,1), (2,0))
        7.  ((1,1), (1,1))
        8.  ((1,1), (2,1))
        9.  ((1,3), (2,2))
        10. ((1,3), (2,0))
        11. ((1,3), (1,1))
        12. ((1,3), (2,1))
        13. ((2,2), (2,2))
        14. ((2,2), (2,0))
        15. ((2,2), (1,1))
        16. ((2,2), (2,1))
        17. ((1,2), (2,2))
        18. ((1,2), (2,0))
        19. ((1,2), (1,1))
        20. ((1,2), (2,1))
        """
        grid = MapfGrid(['....', '....', '....'])

        agents_starts = ((1, 2), (2, 1))
        # don't care
        agents_goals = ((0, 0), (2, 3))

        env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        expected_locations = [((0, 2), (2, 2)), ((0, 2), (2, 0)),
                              ((0, 2), (1, 1)), ((0, 2), (2, 1)),
                              ((1, 1), (2, 2)), ((1, 1), (2, 0)),
                              ((1, 1), (1, 1)), ((1, 1), (2, 1)),
                              ((1, 3), (2, 2)), ((1, 3), (2, 0)),
                              ((1, 3), (1, 1)), ((1, 3), (2, 1)),
                              ((2, 2), (2, 2)), ((2, 2), (2, 0)),
                              ((2, 2), (1, 1)), ((2, 2), (2, 1)),
                              ((1, 2), (2, 2)), ((1, 2), (2, 0)),
                              ((1, 2), (1, 1)), ((1, 2), (2, 1))]

        expected_states = [
            env.locations_to_state(loc) for loc in expected_locations
        ]

        self.assertSetEqual(set(expected_states), set(env.predecessors(env.s)))