コード例 #1
0
    def test_detect_conflict_detects_switching(self):
        """
        * Create an env which its independent optimal policies cause a SWITCHING conflict
        * Solve independently
        * Make sure the conflict is detected
        """
        env = create_mapf_env('room-32-32-4', 9, 2, 0, 0, -1000, 0, -1)

        low_level_plan_func = partial(
            fixed_iterations_count_rtdp,
            partial(local_views_prioritized_value_iteration_min_heuristic,
                    1.0), 1.0, 100)

        policy = solve_independently_and_cross(env, [[0], [1]],
                                               low_level_plan_func, {})
        conflict = detect_conflict(env, policy)
        # Assert a conflict detected
        self.assertIsNotNone(conflict)

        aux_local_env = get_local_view(env, [0])
        agent_1_state = aux_local_env.locations_to_state(((21, 20), ))
        agent_0_state = aux_local_env.locations_to_state(((21, 19), ))

        possible_conflicts = [((1, agent_1_state, agent_0_state),
                               (0, agent_0_state, agent_1_state)),
                              ((0, agent_0_state, agent_1_state),
                               (1, agent_1_state, agent_0_state))]

        # Assert the conflict parameters are right
        self.assertIn(conflict, possible_conflicts)
コード例 #2
0
    def test_policy_crossing_for_non_continuous_agent_range(self):
        """
        * Solve independently for agent groups [[1], [0,2]]
        * Cross the policies
        * Make sure the crossed policy behaves right
        """
        env = create_mapf_env('room-32-32-4', 15, 3, 0, 0, -1000, 0, -1)
        interesting_locations = ((19, 22), (18, 24), (17, 22))

        plan_func = partial(
            fixed_iterations_count_rtdp,
            partial(local_views_prioritized_value_iteration_min_heuristic,
                    1.0), 1.0, 100)
        crossed_policy = solve_independently_and_cross(env, [[1], [0, 2]],
                                                       plan_func, {})

        policy0 = plan_func(get_local_view(env, [1]), {})
        policy1 = plan_func(get_local_view(env, [0, 2]), {})

        action0 = policy0.act(
            policy0.env.locations_to_state((interesting_locations[1], )))
        action1 = policy1.act(
            policy1.env.locations_to_state((interesting_locations[0], ) +
                                           (interesting_locations[2], )))

        vector_action0 = integer_action_to_vector(action0, 1)
        vector_action1 = integer_action_to_vector(action1, 2)
        vector_action_local = (vector_action1[0], vector_action0[0],
                               vector_action1[1])

        joint_action = crossed_policy.act(
            env.locations_to_state(interesting_locations))
        vector_action_joint = integer_action_to_vector(joint_action, 3)

        self.assertEqual(vector_action_local, vector_action_joint)
コード例 #3
0
ファイル: main.py プロジェクト: LevyvoNet/research
def solve_single_instance(log_func, insert_to_db_func,
                          instance: InstanceMetaData):
    instance_data = {
        'type': 'instance_data',
        'map': instance.map,
        'scen_id': instance.scen_id,
        'fail_prob': instance.fail_prob,
        'n_agents': instance.n_agents,
        'solver': instance.solver
    }
    configuration_string = '_'.join(
        [f'{key}:{value}' for key, value in instance_data.items()])
    log_func(DEBUG, f'starting solving instance {configuration_string}')

    # Create mapf env, some of the benchmarks from movingAI might have bugs so be careful
    try:
        env = create_mapf_env(instance.map, instance.scen_id,
                              instance.n_agents, instance.fail_prob / 2,
                              instance.fail_prob / 2, -1000, -1, -1)
    except Exception as ex:
        log_func(ERROR, f'{configuration_string} is invalid')
        instance_data.update({
            'solver_data': {},
            'end_reason':
            'invalid',
            'error':
            ''.join(traceback.TracebackException.from_exception(ex).format())
        })
        insert_to_db_func(instance_data)
        return

    # Run the solver
    instance_data.update({'solver_data': {}})
    with stopit.SignalTimeout(SINGLE_SCENARIO_TIMEOUT,
                              swallow_exc=False) as timeout_ctx:
        try:
            start = time.time()
            policy = instance.plan_func(env, instance_data['solver_data'])
            if policy is not None:
                # policy might be None if the problem is too big for the solver
                reward, clashed, all_rewards = evaluate_policy(
                    policy, 100, MAX_STEPS)
                instance_data['average_reward'] = reward
                instance_data['reward_std'] = np.std(all_rewards)
                instance_data['clashed'] = clashed
        except stopit.utils.TimeoutException:
            instance_data['end_reason'] = 'timeout'
            log_func(DEBUG, f'{configuration_string} got timeout')

        end = time.time()
        instance_data['total_time'] = round(end - start, 2)

    if 'end_reason' not in instance_data:
        instance_data['end_reason'] = 'done'

    log_func(DEBUG, f'inserting {configuration_string} to DB')
    # Insert stats about this instance to the DB
    insert_to_db_func(instance_data)
コード例 #4
0
ファイル: test_heuristics.py プロジェクト: LevyvoNet/research
    def test_dijkstra_room_env(self):
        """Test dijkstra algorithm on a large, complex environment."""
        env = create_mapf_env('room-32-32-4', 1, 1, 0, 0, -1000, -1, -1)

        dijkstra_func = dijkstra_min_heuristic(env)
        vi_policy = value_iteration(1.0, env, {})

        for i in range(env.nS):
            self.assertEqual(dijkstra_func(i), vi_policy.v[i])
コード例 #5
0
    def test_multiple_agents_env(self):
        """Assert that when trying to solver a large environment we are not exceeding the RAM limit."""
        # Note the large number of agents
        env = create_mapf_env('room-32-32-4', 12, 6, 0.1, 0.1, -1000, 0, -1)

        info = {}
        plan_func = self.get_plan_func()
        policy = plan_func(env, info)

        self.assertIs(policy, None)
        self.assertEqual(info['end_reason'], 'out_of_memory')
コード例 #6
0
    def test_conflict_detected_for_room_scenario_with_crossed_policy(self):
        env = create_mapf_env('room-32-32-4', 1, 2, 0.1, 0.1, -1000, 0, -1)

        policy1 = fixed_iterations_count_rtdp(
            partial(local_views_prioritized_value_iteration_min_heuristic,
                    1.0), 1.0, 100, get_local_view(env, [0]), {})
        policy2 = fixed_iterations_count_rtdp(
            partial(local_views_prioritized_value_iteration_min_heuristic,
                    1.0), 1.0, 100, get_local_view(env, [1]), {})
        crossed_policy = CrossedPolicy(env, [policy1, policy2], [[0], [1]])

        self.assertIsNot(detect_conflict(env, crossed_policy), None)
コード例 #7
0
ファイル: utils_tests.py プロジェクト: LevyvoNet/gym-mapf
    def test_create_mapf_env(self):
        empty_8_8_1 = create_mapf_env(map_name='empty-8-8',
                                      scen_id=1,
                                      n_agents=2,
                                      fail_prob=0.2,
                                      reward_of_clash=-1000.0,
                                      reward_of_goal=100.0,
                                      reward_of_living=0.0,
                                      optimization_criteria=OptimizationCriteria.Makespan)

        self.assertEqual(empty_8_8_1.s, empty_8_8_1.locations_to_state(((0, 0), (5, 3))))

        empty_48_48_16 = create_mapf_env(map_name='empty-48-48',
                                         scen_id=16,
                                         n_agents=2,
                                         fail_prob=0.2,
                                         reward_of_clash=-1000.0,
                                         reward_of_goal=100.0,
                                         reward_of_living=0.0,
                                         optimization_criteria=OptimizationCriteria.Makespan)

        self.assertEqual(empty_48_48_16.s, empty_48_48_16.locations_to_state(((40, 42), (17, 2))))
コード例 #8
0
ファイル: test_heuristics.py プロジェクト: LevyvoNet/research
    def test_dijkstra_sum_sanity_room_env_large_goal_reward(self):
        env = create_mapf_env('sanity-2-8', None, 2, 0, 0, -1000, 100, -1)
        env0 = get_local_view(env, [0])
        env1 = get_local_view(env, [1])

        dijkstra_func = dijkstra_sum_heuristic(env)
        vi_policy0 = prioritized_value_iteration(1.0, env0, {})
        vi_policy1 = prioritized_value_iteration(1.0, env1, {})

        for s in range(env.nS):
            s0 = env0.locations_to_state((env.state_to_locations(s)[0], ))
            s1 = env0.locations_to_state((env.state_to_locations(s)[1], ))

            self.assertEqual(dijkstra_func(s),
                             vi_policy0.v[s0] + vi_policy1.v[s1])
コード例 #9
0
ファイル: test_id.py プロジェクト: LevyvoNet/research
    def test_env_with_switch_conflict_solved_properly(self):
        env = create_mapf_env('room-32-32-4', 9, 2, 0, 0, -1000, 0, -1)
        gamma = 1.0
        n_iterations = 100

        rtdp_plan_func = partial(fixed_iterations_count_rtdp,
                                 partial(local_views_prioritized_value_iteration_min_heuristic, gamma),
                                 gamma,
                                 n_iterations)
        rtdp_merge_func = functools.partial(fixed_iterations_rtdp_merge, solution_heuristic_min, gamma, n_iterations)
        policy = id(rtdp_plan_func, rtdp_merge_func, env, {})

        reward, clashed, _ = evaluate_policy(policy, 1, 1000)

        self.assertFalse(clashed)

        # Assert that the solution is reasonable (actually solving)
        self.assertGreater(reward, -1000)
コード例 #10
0
ファイル: main.py プロジェクト: LevyvoNet/research
def restore_weird_stuff():
    """Restore weird performance of ID-MA-RTDP on sanity envs from the heuristics experiment"""
    print('start restoring')
    env = create_mapf_env('sanity-2-32', 1, 3, 0.1, 0.1, -1000, -1, -1)
    solver = long_id_ma_rtdp_sum_pvi_describer.func

    with stopit.SignalTimeout(SINGLE_SCENARIO_TIMEOUT,
                              swallow_exc=False) as timeout_ctx:
        try:
            info = {}
            policy = solver(env, info)
        except stopit.utils.TimeoutException:
            print('got timeout!!!')

    import ipdb
    ipdb.set_trace()

    print('OMG')
コード例 #11
0
ファイル: main.py プロジェクト: LevyvoNet/research
def insert_scenario_metadata(log_func, insert_to_db_func,
                             scenario_metadata: ScenarioMetadata):
    scen_data = {
        'type': 'scenario_data',
        'map': scenario_metadata.map,
        'scen_id': scenario_metadata.scen_id,
        'fail_prob': scenario_metadata.fail_prob,
        'n_agents': scenario_metadata.n_agents,
    }

    configuration_string = '_'.join(
        [f'{key}:{value}' for key, value in scen_data.items()])
    scen_data['valid'] = True
    log_func(DEBUG, f'starting scenario data for {configuration_string}')

    log_func(
        DEBUG,
        f'starting solving independent agents for {configuration_string}')
    try:
        env = create_mapf_env(scenario_metadata.map, scenario_metadata.scen_id,
                              scenario_metadata.n_agents,
                              scenario_metadata.fail_prob / 2,
                              scenario_metadata.fail_prob / 2, -1000, -1, -1)
    except KeyError:
        log_func(ERROR, f'{configuration_string} is invalid')
        scen_data['valid'] = False
        insert_to_db_func(scen_data)
        return

    # Calculate single agent rewards
    scen_data['self_agent_reward'] = []
    for i in range(env.n_agents):
        pvi_plan_func = partial(prioritized_value_iteration, 1.0)
        local_env = get_local_view(env, [i])
        policy = pvi_plan_func(local_env, {})
        local_env.reset()
        self_agent_reward = float(policy.v[local_env.s])
        scen_data['self_agent_reward'].append(self_agent_reward)

    log_func(DEBUG,
             f'inserting scenario data for {configuration_string} to DB')
    # Insert stats about this instance to the DB
    insert_to_db_func(scen_data)
コード例 #12
0
                       '..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0.1,
             0.1, -0.001, 0, -1), 'symmetrical bottle-neck stochastic'),
    (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..'
                       '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0,
             -0.001, 0, -1), 'Asymmetrical bottle-neck deterministic'),
    (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..'
                       '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0,
             -0.001, 100,
             -1), 'Asymmetrical bottle-neck deterministic large goal reward'),
    (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..'
                       '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0,
             -0.001, 100, -1), 'Asymmetrical bottle-neck stochastic'),
]

mid_envs = [
    (create_mapf_env('room-32-32-4', 12, 2, 0, 0, -1000, 0,
                     -1), 'room-32-32-4 scen 12 - 2 agents deterministic'),
    (create_mapf_env('room-32-32-4', 1, 2, 0, 0, -1000, 0,
                     -1), 'room-32-32-4 scen 1 - 2 agents deterministic'),
    (MapfEnv(MapfGrid(['...', '@.@', '@.@', '...']), 2, ((0, 0), (0, 2)),
             ((3, 0), (3, 2)), 0.0, 0.0, -1000, 0, -1), 'hand crafted env'),
    (create_mapf_env('room-32-32-4', 12, 2, 0.1, 0.1, -1000, 0,
                     -1), 'room-32-32-4 scen 12 - stochastic'),
    (create_mapf_env('sanity-3-8', None, 3, 0.1, 0.1, -1000, 0,
                     -1), 'sanity 3 agents stochastic'),
]

difficult_envs = [
    (create_mapf_env('room-32-32-4', 13, 2, 0, 0, -1000, 0,
                     -1), 'room-32-32-4 scen 13 - 2 agents 1 conflict'),
    (create_mapf_env('sanity-2-32', 1, 3, 0.1, 0.1, -1000, 0,
                     -1), 'conflict between pair and single large map')
コード例 #13
0
 #     'env': create_mapf_env('empty-16-16', 1, 4, 0.1, 0.1, -1000, 0, -1),
 #     'env_str': "map:empty-16-16;scen:1;n_agents:4;fail_prob:0.1;2 conflicts",
 #     'solver_describers': [long_ma_rtdp_min_pvi_describer]
 # },
 # {
 #     'env': create_mapf_env('empty-16-16', 1, 5, 0.1, 0.1, -1000, 0, -1),
 #     'env_str': "map:empty-16-16;scen:1;n_agents:5;fail_prob:0.1;2 conflicts",
 #     'solver_describers': [long_ma_rtdp_min_pvi_describer]
 # },
 # {
 #     'env': create_mapf_env('empty-16-16', 1, 6, 0.1, 0.1, -1000, 0, -1),
 #     'env_str': "map:empty-16-16;scen:1;n_agents:6;fail_prob:0.1;2 conflicts",
 #     'solver_describers': [long_ma_rtdp_min_pvi_describer]
 # },
 {
     'env': create_mapf_env('sanity-1-8', None, 1, 0.1, 0.1, -1000, 0, -1),
     'env_str': "map:sanity-1-8;n_agents:1X1;fail_prob:0.1;0 conflicts",
     'solver_describers': EXPERIMENT_SOLVERS
 },
 {
     'env': create_mapf_env('sanity-2-8', None, 2, 0.1, 0.1, -1000, 0, -1),
     'env_str': "map:sanity-2-8;n_agents:2X1;fail_prob:0.1;0 conflicts",
     'solver_describers': EXPERIMENT_SOLVERS
 },
 {
     'env': create_mapf_env('sanity-3-8', None, 3, 0.1, 0.1, -1000, 0, -1),
     'env_str': "map:sanity-3-8;n_agents:3X1;fail_prob:0.1;0 conflicts",
     'solver_describers': EXPERIMENT_SOLVERS
 },
 # {
 #     'env': create_mapf_env('sanity-4-8', None, 4, 0.1, 0.1, -1000, 0, -1),