def test_detect_conflict_detects_switching(self): """ * Create an env which its independent optimal policies cause a SWITCHING conflict * Solve independently * Make sure the conflict is detected """ env = create_mapf_env('room-32-32-4', 9, 2, 0, 0, -1000, 0, -1) low_level_plan_func = partial( fixed_iterations_count_rtdp, partial(local_views_prioritized_value_iteration_min_heuristic, 1.0), 1.0, 100) policy = solve_independently_and_cross(env, [[0], [1]], low_level_plan_func, {}) conflict = detect_conflict(env, policy) # Assert a conflict detected self.assertIsNotNone(conflict) aux_local_env = get_local_view(env, [0]) agent_1_state = aux_local_env.locations_to_state(((21, 20), )) agent_0_state = aux_local_env.locations_to_state(((21, 19), )) possible_conflicts = [((1, agent_1_state, agent_0_state), (0, agent_0_state, agent_1_state)), ((0, agent_0_state, agent_1_state), (1, agent_1_state, agent_0_state))] # Assert the conflict parameters are right self.assertIn(conflict, possible_conflicts)
def test_policy_crossing_for_non_continuous_agent_range(self): """ * Solve independently for agent groups [[1], [0,2]] * Cross the policies * Make sure the crossed policy behaves right """ env = create_mapf_env('room-32-32-4', 15, 3, 0, 0, -1000, 0, -1) interesting_locations = ((19, 22), (18, 24), (17, 22)) plan_func = partial( fixed_iterations_count_rtdp, partial(local_views_prioritized_value_iteration_min_heuristic, 1.0), 1.0, 100) crossed_policy = solve_independently_and_cross(env, [[1], [0, 2]], plan_func, {}) policy0 = plan_func(get_local_view(env, [1]), {}) policy1 = plan_func(get_local_view(env, [0, 2]), {}) action0 = policy0.act( policy0.env.locations_to_state((interesting_locations[1], ))) action1 = policy1.act( policy1.env.locations_to_state((interesting_locations[0], ) + (interesting_locations[2], ))) vector_action0 = integer_action_to_vector(action0, 1) vector_action1 = integer_action_to_vector(action1, 2) vector_action_local = (vector_action1[0], vector_action0[0], vector_action1[1]) joint_action = crossed_policy.act( env.locations_to_state(interesting_locations)) vector_action_joint = integer_action_to_vector(joint_action, 3) self.assertEqual(vector_action_local, vector_action_joint)
def solve_single_instance(log_func, insert_to_db_func, instance: InstanceMetaData): instance_data = { 'type': 'instance_data', 'map': instance.map, 'scen_id': instance.scen_id, 'fail_prob': instance.fail_prob, 'n_agents': instance.n_agents, 'solver': instance.solver } configuration_string = '_'.join( [f'{key}:{value}' for key, value in instance_data.items()]) log_func(DEBUG, f'starting solving instance {configuration_string}') # Create mapf env, some of the benchmarks from movingAI might have bugs so be careful try: env = create_mapf_env(instance.map, instance.scen_id, instance.n_agents, instance.fail_prob / 2, instance.fail_prob / 2, -1000, -1, -1) except Exception as ex: log_func(ERROR, f'{configuration_string} is invalid') instance_data.update({ 'solver_data': {}, 'end_reason': 'invalid', 'error': ''.join(traceback.TracebackException.from_exception(ex).format()) }) insert_to_db_func(instance_data) return # Run the solver instance_data.update({'solver_data': {}}) with stopit.SignalTimeout(SINGLE_SCENARIO_TIMEOUT, swallow_exc=False) as timeout_ctx: try: start = time.time() policy = instance.plan_func(env, instance_data['solver_data']) if policy is not None: # policy might be None if the problem is too big for the solver reward, clashed, all_rewards = evaluate_policy( policy, 100, MAX_STEPS) instance_data['average_reward'] = reward instance_data['reward_std'] = np.std(all_rewards) instance_data['clashed'] = clashed except stopit.utils.TimeoutException: instance_data['end_reason'] = 'timeout' log_func(DEBUG, f'{configuration_string} got timeout') end = time.time() instance_data['total_time'] = round(end - start, 2) if 'end_reason' not in instance_data: instance_data['end_reason'] = 'done' log_func(DEBUG, f'inserting {configuration_string} to DB') # Insert stats about this instance to the DB insert_to_db_func(instance_data)
def test_dijkstra_room_env(self): """Test dijkstra algorithm on a large, complex environment.""" env = create_mapf_env('room-32-32-4', 1, 1, 0, 0, -1000, -1, -1) dijkstra_func = dijkstra_min_heuristic(env) vi_policy = value_iteration(1.0, env, {}) for i in range(env.nS): self.assertEqual(dijkstra_func(i), vi_policy.v[i])
def test_multiple_agents_env(self): """Assert that when trying to solver a large environment we are not exceeding the RAM limit.""" # Note the large number of agents env = create_mapf_env('room-32-32-4', 12, 6, 0.1, 0.1, -1000, 0, -1) info = {} plan_func = self.get_plan_func() policy = plan_func(env, info) self.assertIs(policy, None) self.assertEqual(info['end_reason'], 'out_of_memory')
def test_conflict_detected_for_room_scenario_with_crossed_policy(self): env = create_mapf_env('room-32-32-4', 1, 2, 0.1, 0.1, -1000, 0, -1) policy1 = fixed_iterations_count_rtdp( partial(local_views_prioritized_value_iteration_min_heuristic, 1.0), 1.0, 100, get_local_view(env, [0]), {}) policy2 = fixed_iterations_count_rtdp( partial(local_views_prioritized_value_iteration_min_heuristic, 1.0), 1.0, 100, get_local_view(env, [1]), {}) crossed_policy = CrossedPolicy(env, [policy1, policy2], [[0], [1]]) self.assertIsNot(detect_conflict(env, crossed_policy), None)
def test_create_mapf_env(self): empty_8_8_1 = create_mapf_env(map_name='empty-8-8', scen_id=1, n_agents=2, fail_prob=0.2, reward_of_clash=-1000.0, reward_of_goal=100.0, reward_of_living=0.0, optimization_criteria=OptimizationCriteria.Makespan) self.assertEqual(empty_8_8_1.s, empty_8_8_1.locations_to_state(((0, 0), (5, 3)))) empty_48_48_16 = create_mapf_env(map_name='empty-48-48', scen_id=16, n_agents=2, fail_prob=0.2, reward_of_clash=-1000.0, reward_of_goal=100.0, reward_of_living=0.0, optimization_criteria=OptimizationCriteria.Makespan) self.assertEqual(empty_48_48_16.s, empty_48_48_16.locations_to_state(((40, 42), (17, 2))))
def test_dijkstra_sum_sanity_room_env_large_goal_reward(self): env = create_mapf_env('sanity-2-8', None, 2, 0, 0, -1000, 100, -1) env0 = get_local_view(env, [0]) env1 = get_local_view(env, [1]) dijkstra_func = dijkstra_sum_heuristic(env) vi_policy0 = prioritized_value_iteration(1.0, env0, {}) vi_policy1 = prioritized_value_iteration(1.0, env1, {}) for s in range(env.nS): s0 = env0.locations_to_state((env.state_to_locations(s)[0], )) s1 = env0.locations_to_state((env.state_to_locations(s)[1], )) self.assertEqual(dijkstra_func(s), vi_policy0.v[s0] + vi_policy1.v[s1])
def test_env_with_switch_conflict_solved_properly(self): env = create_mapf_env('room-32-32-4', 9, 2, 0, 0, -1000, 0, -1) gamma = 1.0 n_iterations = 100 rtdp_plan_func = partial(fixed_iterations_count_rtdp, partial(local_views_prioritized_value_iteration_min_heuristic, gamma), gamma, n_iterations) rtdp_merge_func = functools.partial(fixed_iterations_rtdp_merge, solution_heuristic_min, gamma, n_iterations) policy = id(rtdp_plan_func, rtdp_merge_func, env, {}) reward, clashed, _ = evaluate_policy(policy, 1, 1000) self.assertFalse(clashed) # Assert that the solution is reasonable (actually solving) self.assertGreater(reward, -1000)
def restore_weird_stuff(): """Restore weird performance of ID-MA-RTDP on sanity envs from the heuristics experiment""" print('start restoring') env = create_mapf_env('sanity-2-32', 1, 3, 0.1, 0.1, -1000, -1, -1) solver = long_id_ma_rtdp_sum_pvi_describer.func with stopit.SignalTimeout(SINGLE_SCENARIO_TIMEOUT, swallow_exc=False) as timeout_ctx: try: info = {} policy = solver(env, info) except stopit.utils.TimeoutException: print('got timeout!!!') import ipdb ipdb.set_trace() print('OMG')
def insert_scenario_metadata(log_func, insert_to_db_func, scenario_metadata: ScenarioMetadata): scen_data = { 'type': 'scenario_data', 'map': scenario_metadata.map, 'scen_id': scenario_metadata.scen_id, 'fail_prob': scenario_metadata.fail_prob, 'n_agents': scenario_metadata.n_agents, } configuration_string = '_'.join( [f'{key}:{value}' for key, value in scen_data.items()]) scen_data['valid'] = True log_func(DEBUG, f'starting scenario data for {configuration_string}') log_func( DEBUG, f'starting solving independent agents for {configuration_string}') try: env = create_mapf_env(scenario_metadata.map, scenario_metadata.scen_id, scenario_metadata.n_agents, scenario_metadata.fail_prob / 2, scenario_metadata.fail_prob / 2, -1000, -1, -1) except KeyError: log_func(ERROR, f'{configuration_string} is invalid') scen_data['valid'] = False insert_to_db_func(scen_data) return # Calculate single agent rewards scen_data['self_agent_reward'] = [] for i in range(env.n_agents): pvi_plan_func = partial(prioritized_value_iteration, 1.0) local_env = get_local_view(env, [i]) policy = pvi_plan_func(local_env, {}) local_env.reset() self_agent_reward = float(policy.v[local_env.s]) scen_data['self_agent_reward'].append(self_agent_reward) log_func(DEBUG, f'inserting scenario data for {configuration_string} to DB') # Insert stats about this instance to the DB insert_to_db_func(scen_data)
'..@...']), 2, ((2, 0), (2, 5)), ((2, 5), (2, 0)), 0.1, 0.1, -0.001, 0, -1), 'symmetrical bottle-neck stochastic'), (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..' '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0, -0.001, 0, -1), 'Asymmetrical bottle-neck deterministic'), (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..' '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0, -0.001, 100, -1), 'Asymmetrical bottle-neck deterministic large goal reward'), (MapfEnv(MapfGrid(['..@..', '..@..', '.....', '..@..' '..@..']), 2, ((2, 0), (2, 4)), ((2, 4), (2, 0)), 0, 0, -0.001, 100, -1), 'Asymmetrical bottle-neck stochastic'), ] mid_envs = [ (create_mapf_env('room-32-32-4', 12, 2, 0, 0, -1000, 0, -1), 'room-32-32-4 scen 12 - 2 agents deterministic'), (create_mapf_env('room-32-32-4', 1, 2, 0, 0, -1000, 0, -1), 'room-32-32-4 scen 1 - 2 agents deterministic'), (MapfEnv(MapfGrid(['...', '@.@', '@.@', '...']), 2, ((0, 0), (0, 2)), ((3, 0), (3, 2)), 0.0, 0.0, -1000, 0, -1), 'hand crafted env'), (create_mapf_env('room-32-32-4', 12, 2, 0.1, 0.1, -1000, 0, -1), 'room-32-32-4 scen 12 - stochastic'), (create_mapf_env('sanity-3-8', None, 3, 0.1, 0.1, -1000, 0, -1), 'sanity 3 agents stochastic'), ] difficult_envs = [ (create_mapf_env('room-32-32-4', 13, 2, 0, 0, -1000, 0, -1), 'room-32-32-4 scen 13 - 2 agents 1 conflict'), (create_mapf_env('sanity-2-32', 1, 3, 0.1, 0.1, -1000, 0, -1), 'conflict between pair and single large map')
# 'env': create_mapf_env('empty-16-16', 1, 4, 0.1, 0.1, -1000, 0, -1), # 'env_str': "map:empty-16-16;scen:1;n_agents:4;fail_prob:0.1;2 conflicts", # 'solver_describers': [long_ma_rtdp_min_pvi_describer] # }, # { # 'env': create_mapf_env('empty-16-16', 1, 5, 0.1, 0.1, -1000, 0, -1), # 'env_str': "map:empty-16-16;scen:1;n_agents:5;fail_prob:0.1;2 conflicts", # 'solver_describers': [long_ma_rtdp_min_pvi_describer] # }, # { # 'env': create_mapf_env('empty-16-16', 1, 6, 0.1, 0.1, -1000, 0, -1), # 'env_str': "map:empty-16-16;scen:1;n_agents:6;fail_prob:0.1;2 conflicts", # 'solver_describers': [long_ma_rtdp_min_pvi_describer] # }, { 'env': create_mapf_env('sanity-1-8', None, 1, 0.1, 0.1, -1000, 0, -1), 'env_str': "map:sanity-1-8;n_agents:1X1;fail_prob:0.1;0 conflicts", 'solver_describers': EXPERIMENT_SOLVERS }, { 'env': create_mapf_env('sanity-2-8', None, 2, 0.1, 0.1, -1000, 0, -1), 'env_str': "map:sanity-2-8;n_agents:2X1;fail_prob:0.1;0 conflicts", 'solver_describers': EXPERIMENT_SOLVERS }, { 'env': create_mapf_env('sanity-3-8', None, 3, 0.1, 0.1, -1000, 0, -1), 'env_str': "map:sanity-3-8;n_agents:3X1;fail_prob:0.1;0 conflicts", 'solver_describers': EXPERIMENT_SOLVERS }, # { # 'env': create_mapf_env('sanity-4-8', None, 4, 0.1, 0.1, -1000, 0, -1),