def test_corridor_switch_no_clash_possible(solver_describer: SolverDescriber):
    grid = MapfGrid(['...', '@.@'])
    agents_starts = ((0, 0), (0, 2))
    agents_goals = ((0, 2), (0, 0))

    # These parameters are for making sure that the solver avoids collision regardless of reward efficiency
    env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.1, -0.001, 0,
                  -1)

    info = {}
    policy = solver_describer.func(env, info)

    # Assert no conflict is possible
    interesting_state = env.locations_to_state(((1, 1), (0, 1)))
    expected_possible_actions = [
        vector_action_to_integer((STAY, UP)),
        vector_action_to_integer((DOWN, UP))
    ]

    assert policy.act(interesting_state) in expected_possible_actions

    # Check the policy performance
    reward, clashed, _ = evaluate_policy(policy, 100, 100)

    # Make sure no clash happened
    assert not clashed

    # Assert the reward is reasonable
    assert reward >= 100.0 * env.reward_of_living
Example #2
0
    def test_copy_mapf_env(self):
        grid = MapfGrid(['....', '....', '....', '....', '....'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((4, 0), ), 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        env.step(vector_action_to_integer((RIGHT, )))

        env_copy = copy(env)
        env_copy.step(vector_action_to_integer((RIGHT, )))
Example #3
0
    def test_reward_single_agent_makespan(self):
        grid = MapfGrid(['....', '....', '....', '....', '....'])

        start_locations = ((0, 0), )
        goal_locations = ((4, 0), )

        determinstic_env = MapfEnv(grid, 1, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)
        total_reward = 0
        down_action = vector_action_to_integer((DOWN, ))
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        s, r, done, _ = determinstic_env.step(down_action)
        total_reward += r

        self.assertEqual(s,
                         determinstic_env.locations_to_state(goal_locations))
        self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_GOAL)

        self.assertEqual(total_reward, REWARD_OF_GOAL + 4 * REWARD_OF_LIVING)
Example #4
0
    def test_similar_transitions_probability_summed(self):
        grid = MapfGrid(['..', '..'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((1, 1), ), 0.1, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        a = vector_action_to_integer((STAY, STAY))
        self.assertEqual(env.P[env.s][a],
                         [((1, False), env.s, REWARD_OF_LIVING, False)])
Example #5
0
    def test_reward_multiagent_soc_stay_actions(self):
        grid = MapfGrid(['....', '....', '....', '....'])

        start_locations = ((0, 0), (3, 3), (1, 1))
        goal_locations = ((0, 1), (1, 3), (1, 2))

        determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING, OptimizationCriteria.SoC)

        right_stay_stay = vector_action_to_integer((RIGHT, STAY, STAY))
        s, r, done, _ = determinstic_env.step(right_stay_stay)
        self.assertEqual(r, -3)
Example #6
0
    def test_reawrd_multiagent_makespan(self):
        grid = MapfGrid(['....', '....', '....', '....'])

        start_locations = ((0, 0), (3, 3), (1, 1))
        goal_locations = ((0, 1), (1, 3), (1, 2))

        determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)

        total_reward = 0
        right_up_right = vector_action_to_integer((RIGHT, UP, RIGHT))
        s, r, done, _ = determinstic_env.step(right_up_right)
        total_reward += r
        self.assertFalse(done)

        stay_up_stay = vector_action_to_integer((STAY, UP, STAY))
        s, r, done, _ = determinstic_env.step(stay_up_stay)
        total_reward += r
        self.assertEqual(s,
                         determinstic_env.locations_to_state(goal_locations))
        self.assertTrue(done)
        self.assertEqual(total_reward, 2 * REWARD_OF_LIVING + REWARD_OF_GOAL)
Example #7
0
    def test_action_from_terminal_state_has_no_effect(self):
        grid = MapfGrid(['..', '..'])
        env = MapfEnv(grid, 1, ((0, 0), ), ((1, 1), ), 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        state, reward, done, _ = env.step(vector_action_to_integer((RIGHT, )))
        self.assertEqual(reward, REWARD_OF_LIVING)
        self.assertEqual(done, False)
        state, reward, done, _ = env.step(vector_action_to_integer((DOWN, )))
        self.assertEqual(reward, REWARD_OF_LIVING + REWARD_OF_GOAL)
        self.assertEqual(done, True)
        # now, after the game is finished - do another step and make sure it has not effect.
        state_after_done, reward_after_done, done_after_done, _ = env.step(
            vector_action_to_integer((UP, )))
        self.assertEqual(state_after_done, state)
        self.assertEqual(done_after_done, True)
        self.assertEqual(reward_after_done, 0)
        # another time like I'm trying to reach the goal
        state_after_done, reward_after_done, done_after_done, _ = env.step(
            vector_action_to_integer((DOWN, )))
        self.assertEqual(state_after_done, state)
        self.assertEqual(done_after_done, True)
        self.assertEqual(reward_after_done, 0)
Example #8
0
    def act(self, joint_state):
        if joint_state in self.policy_cache:
            return self.policy_cache[joint_state]

        joint_action = ()
        forbidden_states = set()
        for agent in range(self.env.n_agents):
            # TODO: the problem is that the best response is according to joint state even though we are in state s.
            # TODO: we shouldn't actually step in this part...
            local_action = best_response(self, joint_state, agent,
                                         forbidden_states, False)
            joint_action = joint_action + (ACTIONS[local_action], )

        best_action = vector_action_to_integer(joint_action)
        self.policy_cache[joint_state] = best_action
        return best_action
Example #9
0
    def test_switch_spots_is_a_collision(self):
        grid = MapfGrid(['..'])

        agents_starts = (
            (0, 0),
            (0, 1),
        )
        agents_goals = ((0, 1), (0, 0))

        determinstic_env = MapfEnv(grid, 2, agents_starts, agents_goals, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)

        s, r, done, _ = determinstic_env.step(
            vector_action_to_integer((RIGHT, LEFT)))

        # Assert the game terminated in a collision
        self.assertEqual(done, True)
        self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_CLASH)
Example #10
0
    def test_roni_scenario_with_id(self):
        # TODO: this test only pass when the first action in the ACTIONS array is STAY,
        #  fix it to work without the cheating
        grid = MapfGrid(['.@.', '.@.', '...'])
        agents_starts = ((0, 0), (0, 2))
        agents_goals = ((2, 0), (2, 2))

        env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.01, -1, 1,
                      -0.1)

        independent_joiont_policy = solve_independently_and_cross(
            env, [[0], [1]], partial(value_iteration, 1.0), {})

        interesting_state = env.locations_to_state(((0, 0), (0, 2)))

        # Assert independent_joint_policy just choose the most efficient action
        self.assertEqual(independent_joiont_policy.act(interesting_state),
                         vector_action_to_integer((DOWN, DOWN)))

        # Assert no conflict
        self.assertEqual(detect_conflict(env, independent_joiont_policy), None)
Example #11
0
    def test_colliding_agents_state_is_terminal_and_negative_reward(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))

        grid = MapfGrid(parse_map_file(map_file_path))

        # agents are starting a
        agent_starts = ((0, 0), (0, 2))
        agents_goals = ((7, 7), (5, 5))

        env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB,
                      REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)
        transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[env.s][vector_action_to_integer((RIGHT, LEFT))]
        ]

        self.assertIn(((0.64, True), env.locations_to_state(
            ((0, 1), (0, 1))), REWARD_OF_LIVING + REWARD_OF_CLASH, True),
                      set(transitions))
Example #12
0
    def get_q(self, agent, joint_state, local_action):
        if joint_state in self.q_partial_table[agent]:
            if local_action in self.q_partial_table[agent][joint_state]:
                return self.q_partial_table[agent][joint_state][local_action]

        # Calculate Q[s][a] for each possible local action
        all_stay = (STAY, ) * self.env.n_agents
        joint_action_vector = all_stay[:agent] + (
            ACTIONS[local_action], ) + all_stay[agent + 1:]
        joint_action = vector_action_to_integer(joint_action_vector)

        # Compute Q[s][a]. In case of a possible clash set the reward to -infinity
        q_value = 0
        for prob, next_state, reward, done in self.env.P[joint_state][
                joint_action]:
            if reward == self.env.reward_of_clash and done:
                q_value = -math.inf

            q_value += prob * (reward + (self.gamma * self.v[next_state]))

        self.q_partial_table[agent][joint_state][local_action] = q_value

        return self.q_partial_table[agent][joint_state][local_action]
Example #13
0
 def test_vector_action_to_integer(self):
     self.assertEqual((DOWN, UP),
                      integer_action_to_vector(vector_action_to_integer((DOWN, UP)), 2))
Example #14
0
def multi_agent_turn_based_rtdp_single_iteration(policy: MultiagentRtdpPolicy,
                                                 info: Dict):
    s = policy.env.reset()
    done = False
    start = time.time()
    path = []
    total_reward = 0

    # # debug
    # print('--------start iteration---------------')

    steps = 0
    while not done and steps < 1000:
        steps += 1
        trajectory_actions = []
        forbidden_states = set()
        joint_action_vector = (STAY, ) * policy.env.n_agents

        # Calculate local action
        for agent in range(policy.env.n_agents):
            local_action = best_response(policy, s, agent, forbidden_states,
                                         False)
            trajectory_actions.append(local_action)
            joint_action_vector = joint_action_vector[:agent] + (
                ACTIONS[local_action], ) + joint_action_vector[agent + 1:]

        # # debug
        # policy.env.render()
        # print(f'selected action: {joint_action_vector}')
        # time.sleep(0.2)

        # Compose the joint action
        joint_action = vector_action_to_integer(joint_action_vector)
        path.append((s, joint_action))

        # update the current state
        for agent in reversed(range(policy.env.n_agents)):
            # update q(s, agent, action) based on the last state
            policy.v_update(s)
            policy.q_update(agent, s, trajectory_actions[agent], joint_action)

        policy.visited_states[s] = policy.visited_states[s] + 1

        # step
        s, r, done, _ = policy.env.step(joint_action)
        total_reward += r

    # # debug
    # policy.env.render()

    # # Backward update
    # while path:
    #     s, joint_action = path.pop()
    #     policy.v_update(s)
    #     joint_action_vector = integer_action_to_vector(joint_action, policy.env.n_agents)
    #     for agent in reversed(range(policy.env.n_agents)):
    #         local_action = vector_action_to_integer((joint_action_vector[agent],))
    #         policy.q_update(agent, s, local_action, joint_action)

    # # debug
    # print('--------end iteration---------------')

    return total_reward
Example #15
0
    def test_couple_detect_conflict_3_agents_multiple_agents_in_group(self):
        """This test may sometime be used to test detecting a conflict for only a couple of agents.

            The test will make sure that agent 0 got no conflicts with 1 and 2 while agents 1 and 2 do get a conflict.
            Now agent 1 will be a part of a group contains both agent 0 and 1 ([0,1]). This way agent 1 index in its
            group will be 1 and not 0. This case is catching a bug I had previously.
        """
        grid = MapfGrid(['...', '...', '...'])

        agents_starts = ((0, 0), (2, 0), (2, 2))
        agents_goals = ((0, 2), (2, 2), (2, 0))
        env = MapfEnv(grid, 3, agents_starts, agents_goals, 0, 0, -1, 1, -0.01)
        single_agent_env = MapfEnv(grid, 1, (agents_starts[0], ),
                                   (agents_goals[0], ), 0, 0, -1, 1, -0.01)
        env01 = get_local_view(env, [0, 1])

        # >>S
        # SSS
        # SSS
        policy0 = {
            0: ACTIONS.index(RIGHT),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(STAY),
            3: ACTIONS.index(RIGHT),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(STAY),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(STAY),
        }

        # SSS
        # SSS
        # >>S
        policy1 = {
            0: ACTIONS.index(STAY),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(RIGHT),
            3: ACTIONS.index(STAY),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(RIGHT),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(STAY),
        }

        # policy01 is a cross between agent 0 and agent 1
        policy01 = {}
        for s0 in range(9):
            for s1 in range(9):
                joint_state = env01.locations_to_state(
                    (single_agent_env.state_to_locations(s0)[0],
                     single_agent_env.state_to_locations(s1)[0]))
                policy01[joint_state] = vector_action_to_integer(
                    (integer_action_to_vector(policy0[s0], 1)[0],
                     integer_action_to_vector(policy1[s1], 1)[0]))

        # SSS
        # SSS
        # S<<
        policy2 = {
            0: ACTIONS.index(STAY),
            1: ACTIONS.index(STAY),
            2: ACTIONS.index(STAY),
            3: ACTIONS.index(STAY),
            4: ACTIONS.index(STAY),
            5: ACTIONS.index(LEFT),
            6: ACTIONS.index(STAY),
            7: ACTIONS.index(STAY),
            8: ACTIONS.index(LEFT),
        }

        joint_policy = CrossedPolicy(env, [
            DictPolicy(env01, 1.0, policy01),
            DictPolicy(get_local_view(env, [2]), 1.0, policy2)
        ], [[0, 1], [2]])

        aux_local_env = get_local_view(env, [0])

        # Assert a conflict is found for agents 1 and 2
        self.assertEqual(
            couple_detect_conflict(env, joint_policy, 2, 1),
            ((2, aux_local_env.locations_to_state(
                ((2, 2), )), aux_local_env.locations_to_state(((2, 1), ))),
             (1, aux_local_env.locations_to_state(
                 ((2, 0), )), aux_local_env.locations_to_state(((2, 1), )))))

        # Assert no conflict is found for agents 0 and 1
        self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 1))

        # Assert no conflict is found for agents 0 and 2
        self.assertIsNone(couple_detect_conflict(env, joint_policy, 0, 2))
Example #16
0
    def test_transition_function_empty_grid(self):
        """Assert the basic steps are done right.

        * Define an empty 8x8 environment with two agents starting at (0,0),(7,7) and desire to reach (0,2),(5,7).
        * Perform one (RIGHT, UP) step and assert that the transitions are correct.
        * Perform another (RIGHT, UP) step from the most probable next state from before ((0,1), (6,7)) and assert
            that the transitions are correct again, including the terminal one.
        """
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        # agents are starting a
        agent_starts = ((0, 0), (7, 7))
        agents_goals = ((0, 2), (5, 7))

        env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB,
                      REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        first_step_transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[env.s][vector_action_to_integer((RIGHT, UP))]
        ]

        self.assertEqual(
            set(first_step_transitions),
            {
                ((0.64, False), env.locations_to_state(
                    ((0, 1), (6, 7))), REWARD_OF_LIVING, False),  # (RIGHT, UP)
                ((0.08, False), env.locations_to_state(
                    ((1, 0), (6, 7))), REWARD_OF_LIVING, False),  # (DOWN, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 0), (6, 7))), REWARD_OF_LIVING, False),  # (UP, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 1),
                     (7, 7))), REWARD_OF_LIVING, False),  # (RIGHT, RIGHT)
                ((0.08, False), env.locations_to_state(
                    ((0, 1),
                     (7, 6))), REWARD_OF_LIVING, False),  # (RIGHT, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((1, 0),
                     (7, 7))), REWARD_OF_LIVING, False),  # (DOWN, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((1, 0),
                     (7, 6))), REWARD_OF_LIVING, False),  # (DOWN, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((0, 0), (7, 7))), REWARD_OF_LIVING, False),  # (UP, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((0, 0), (7, 6))), REWARD_OF_LIVING, False)  # (UP, LEFT)
            })

        wish_state = env.locations_to_state(((0, 1), (6, 7)))
        second_step_transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[wish_state][vector_action_to_integer((RIGHT,
                                                                      UP))]
        ]

        # [(0,0), (7,7)]
        self.assertEqual(
            set(second_step_transitions),
            {
                ((0.64, False), env.locations_to_state(
                    ((0, 2),
                     (5, 7))), REWARD_OF_LIVING + REWARD_OF_GOAL, True),
                # (RIGHT, UP)
                ((0.08, False), env.locations_to_state(
                    ((1, 1), (5, 7))), REWARD_OF_LIVING, False),  # (DOWN, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 1), (5, 7))), REWARD_OF_LIVING, False),  # (UP, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 2),
                     (6, 7))), REWARD_OF_LIVING, False),  # (RIGHT, RIGHT)
                ((0.08, False), env.locations_to_state(
                    ((0, 2),
                     (6, 6))), REWARD_OF_LIVING, False),  # (RIGHT, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((1, 1),
                     (6, 7))), REWARD_OF_LIVING, False),  # (DOWN, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((1, 1),
                     (6, 6))), REWARD_OF_LIVING, False),  # (DOWN, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((0, 1), (6, 7))), REWARD_OF_LIVING, False),  # (UP, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((0, 1), (6, 6))), REWARD_OF_LIVING, False)  # (UP, LEFT)
            })