예제 #1
0
    def test_reward_single_agent_makespan(self):
        grid = MapfGrid(['....', '....', '....', '....', '....'])

        start_locations = ((0, 0), )
        goal_locations = ((4, 0), )

        determinstic_env = MapfEnv(grid, 1, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)
        total_reward = 0
        down_action = vector_action_to_integer((DOWN, ))
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        _, r, _, _ = determinstic_env.step(down_action)
        total_reward += r
        s, r, done, _ = determinstic_env.step(down_action)
        total_reward += r

        self.assertEqual(s,
                         determinstic_env.locations_to_state(goal_locations))
        self.assertEqual(r, REWARD_OF_LIVING + REWARD_OF_GOAL)

        self.assertEqual(total_reward, REWARD_OF_GOAL + 4 * REWARD_OF_LIVING)
예제 #2
0
def test_corridor_switch_no_clash_possible(solver_describer: SolverDescriber):
    grid = MapfGrid(['...', '@.@'])
    agents_starts = ((0, 0), (0, 2))
    agents_goals = ((0, 2), (0, 0))

    # These parameters are for making sure that the solver avoids collision regardless of reward efficiency
    env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.1, -0.001, 0,
                  -1)

    info = {}
    policy = solver_describer.func(env, info)

    # Assert no conflict is possible
    interesting_state = env.locations_to_state(((1, 1), (0, 1)))
    expected_possible_actions = [
        vector_action_to_integer((STAY, UP)),
        vector_action_to_integer((DOWN, UP))
    ]

    assert policy.act(interesting_state) in expected_possible_actions

    # Check the policy performance
    reward, clashed, _ = evaluate_policy(policy, 100, 100)

    # Make sure no clash happened
    assert not clashed

    # Assert the reward is reasonable
    assert reward >= 100.0 * env.reward_of_living
예제 #3
0
    def test_roni_scenario_with_id(self):
        # TODO: this test only pass when the first action in the ACTIONS array is STAY,
        #  fix it to work without the cheating
        grid = MapfGrid(['.@.', '.@.', '...'])
        agents_starts = ((0, 0), (0, 2))
        agents_goals = ((2, 0), (2, 2))

        env = MapfEnv(grid, 2, agents_starts, agents_goals, 0.1, 0.01, -1, 1,
                      -0.1)

        independent_joiont_policy = solve_independently_and_cross(
            env, [[0], [1]], partial(value_iteration, 1.0), {})

        interesting_state = env.locations_to_state(((0, 0), (0, 2)))

        # Assert independent_joint_policy just choose the most efficient action
        self.assertEqual(independent_joiont_policy.act(interesting_state),
                         vector_action_to_integer((DOWN, DOWN)))

        # Assert no conflict
        self.assertEqual(detect_conflict(env, independent_joiont_policy), None)
예제 #4
0
    def test_colliding_agents_state_is_terminal_and_negative_reward(self):
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))

        grid = MapfGrid(parse_map_file(map_file_path))

        # agents are starting a
        agent_starts = ((0, 0), (0, 2))
        agents_goals = ((7, 7), (5, 5))

        env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB,
                      REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)
        transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[env.s][vector_action_to_integer((RIGHT, LEFT))]
        ]

        self.assertIn(((0.64, True), env.locations_to_state(
            ((0, 1), (0, 1))), REWARD_OF_LIVING + REWARD_OF_CLASH, True),
                      set(transitions))
예제 #5
0
    def test_reawrd_multiagent_makespan(self):
        grid = MapfGrid(['....', '....', '....', '....'])

        start_locations = ((0, 0), (3, 3), (1, 1))
        goal_locations = ((0, 1), (1, 3), (1, 2))

        determinstic_env = MapfEnv(grid, 3, start_locations, goal_locations, 0,
                                   REWARD_OF_CLASH, REWARD_OF_GOAL,
                                   REWARD_OF_LIVING,
                                   OptimizationCriteria.Makespan)

        total_reward = 0
        right_up_right = vector_action_to_integer((RIGHT, UP, RIGHT))
        s, r, done, _ = determinstic_env.step(right_up_right)
        total_reward += r
        self.assertFalse(done)

        stay_up_stay = vector_action_to_integer((STAY, UP, STAY))
        s, r, done, _ = determinstic_env.step(stay_up_stay)
        total_reward += r
        self.assertEqual(s,
                         determinstic_env.locations_to_state(goal_locations))
        self.assertTrue(done)
        self.assertEqual(total_reward, 2 * REWARD_OF_LIVING + REWARD_OF_GOAL)
예제 #6
0
    def test_predecessors(self):
        """Assert the predecessors function works correctly.

        Create an environment which looks like that:
        ....
        ..0.
        .1..

        3X4 grid.
        agent 0 is at (1,2)
        agent 1 is at (2,1)

        The predecessors for agent 0 are:
        1. (0,2)
        2. (1,1)
        3. (1,3)
        4. (2,2)

        The predecessors for agent 1 are:
        1. (2,2)
        2. (2,0)
        3. (1,1)

        Therefore, the predecessors states of the initial state corresponds to these locations:
        1.  ((0,2), (2,2))
        2.  ((0,2), (2,0))
        3.  ((0,2), (1,1))
        4.  ((0,2), (2,1))
        5.  ((1,1), (2,2))
        6.  ((1,1), (2,0))
        7.  ((1,1), (1,1))
        8.  ((1,1), (2,1))
        9.  ((1,3), (2,2))
        10. ((1,3), (2,0))
        11. ((1,3), (1,1))
        12. ((1,3), (2,1))
        13. ((2,2), (2,2))
        14. ((2,2), (2,0))
        15. ((2,2), (1,1))
        16. ((2,2), (2,1))
        17. ((1,2), (2,2))
        18. ((1,2), (2,0))
        19. ((1,2), (1,1))
        20. ((1,2), (2,1))
        """
        grid = MapfGrid(['....', '....', '....'])

        agents_starts = ((1, 2), (2, 1))
        # don't care
        agents_goals = ((0, 0), (2, 3))

        env = MapfEnv(grid, 2, agents_starts, agents_goals, 0, REWARD_OF_CLASH,
                      REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        expected_locations = [((0, 2), (2, 2)), ((0, 2), (2, 0)),
                              ((0, 2), (1, 1)), ((0, 2), (2, 1)),
                              ((1, 1), (2, 2)), ((1, 1), (2, 0)),
                              ((1, 1), (1, 1)), ((1, 1), (2, 1)),
                              ((1, 3), (2, 2)), ((1, 3), (2, 0)),
                              ((1, 3), (1, 1)), ((1, 3), (2, 1)),
                              ((2, 2), (2, 2)), ((2, 2), (2, 0)),
                              ((2, 2), (1, 1)), ((2, 2), (2, 1)),
                              ((1, 2), (2, 2)), ((1, 2), (2, 0)),
                              ((1, 2), (1, 1)), ((1, 2), (2, 1))]

        expected_states = [
            env.locations_to_state(loc) for loc in expected_locations
        ]

        self.assertSetEqual(set(expected_states), set(env.predecessors(env.s)))
예제 #7
0
    def test_transition_function_empty_grid(self):
        """Assert the basic steps are done right.

        * Define an empty 8x8 environment with two agents starting at (0,0),(7,7) and desire to reach (0,2),(5,7).
        * Perform one (RIGHT, UP) step and assert that the transitions are correct.
        * Perform another (RIGHT, UP) step from the most probable next state from before ((0,1), (6,7)) and assert
            that the transitions are correct again, including the terminal one.
        """
        map_file_path = os.path.abspath(
            os.path.join(__file__, MAPS_DIR, 'empty-8-8/empty-8-8.map'))
        grid = MapfGrid(parse_map_file(map_file_path))

        # agents are starting a
        agent_starts = ((0, 0), (7, 7))
        agents_goals = ((0, 2), (5, 7))

        env = MapfEnv(grid, 2, agent_starts, agents_goals, FAIL_PROB,
                      REWARD_OF_CLASH, REWARD_OF_GOAL, REWARD_OF_LIVING,
                      OptimizationCriteria.Makespan)

        first_step_transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[env.s][vector_action_to_integer((RIGHT, UP))]
        ]

        self.assertEqual(
            set(first_step_transitions),
            {
                ((0.64, False), env.locations_to_state(
                    ((0, 1), (6, 7))), REWARD_OF_LIVING, False),  # (RIGHT, UP)
                ((0.08, False), env.locations_to_state(
                    ((1, 0), (6, 7))), REWARD_OF_LIVING, False),  # (DOWN, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 0), (6, 7))), REWARD_OF_LIVING, False),  # (UP, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 1),
                     (7, 7))), REWARD_OF_LIVING, False),  # (RIGHT, RIGHT)
                ((0.08, False), env.locations_to_state(
                    ((0, 1),
                     (7, 6))), REWARD_OF_LIVING, False),  # (RIGHT, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((1, 0),
                     (7, 7))), REWARD_OF_LIVING, False),  # (DOWN, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((1, 0),
                     (7, 6))), REWARD_OF_LIVING, False),  # (DOWN, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((0, 0), (7, 7))), REWARD_OF_LIVING, False),  # (UP, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((0, 0), (7, 6))), REWARD_OF_LIVING, False)  # (UP, LEFT)
            })

        wish_state = env.locations_to_state(((0, 1), (6, 7)))
        second_step_transitions = [
            ((round(prob, 2), collision), next_state, reward, done)
            for ((prob, collision), next_state, reward,
                 done) in env.P[wish_state][vector_action_to_integer((RIGHT,
                                                                      UP))]
        ]

        # [(0,0), (7,7)]
        self.assertEqual(
            set(second_step_transitions),
            {
                ((0.64, False), env.locations_to_state(
                    ((0, 2),
                     (5, 7))), REWARD_OF_LIVING + REWARD_OF_GOAL, True),
                # (RIGHT, UP)
                ((0.08, False), env.locations_to_state(
                    ((1, 1), (5, 7))), REWARD_OF_LIVING, False),  # (DOWN, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 1), (5, 7))), REWARD_OF_LIVING, False),  # (UP, UP)
                ((0.08, False), env.locations_to_state(
                    ((0, 2),
                     (6, 7))), REWARD_OF_LIVING, False),  # (RIGHT, RIGHT)
                ((0.08, False), env.locations_to_state(
                    ((0, 2),
                     (6, 6))), REWARD_OF_LIVING, False),  # (RIGHT, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((1, 1),
                     (6, 7))), REWARD_OF_LIVING, False),  # (DOWN, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((1, 1),
                     (6, 6))), REWARD_OF_LIVING, False),  # (DOWN, LEFT)
                ((0.01, False), env.locations_to_state(
                    ((0, 1), (6, 7))), REWARD_OF_LIVING, False),  # (UP, RIGHT)
                ((0.01, False), env.locations_to_state(
                    ((0, 1), (6, 6))), REWARD_OF_LIVING, False)  # (UP, LEFT)
            })