def tests_random_interference_from_outside(): """Tests that malfunctions are produced by stochastic_data!""" # Set fixed malfunction duration for this test rail, rail_map = make_simple_rail2() env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=2), number_of_agents=1, random_seed=1) env.reset() env.agents[0].speed_data['speed'] = 0.33 env.reset(False, False, False, random_seed=10) env_data = [] for step in range(200): action_dict: Dict[int, RailEnvActions] = {} for agent in env.agents: # We randomly select an action action_dict[agent.handle] = RailEnvActions(2) _, reward, _, _ = env.step(action_dict) # Append the rewards of the first trial env_data.append((reward[0], env.agents[0].position)) assert reward[0] == env_data[step][0] assert env.agents[0].position == env_data[step][1] # Run the same test as above but with an external random generator running # Check that the reward stays the same rail, rail_map = make_simple_rail2() random.seed(47) np.random.seed(1234) env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=2), number_of_agents=1, random_seed=1) env.reset() env.agents[0].speed_data['speed'] = 0.33 env.reset(False, False, False, random_seed=10) dummy_list = [1, 2, 6, 7, 8, 9, 4, 5, 4] for step in range(200): action_dict: Dict[int, RailEnvActions] = {} for agent in env.agents: # We randomly select an action action_dict[agent.handle] = RailEnvActions(2) # Do dummy random number generations random.shuffle(dummy_list) np.random.rand() _, reward, _, _ = env.step(action_dict) assert reward[0] == env_data[step][0] assert env.agents[0].position == env_data[step][1]
def test_malfunction_values_and_behavior(): """ Test the malfunction counts down as desired Returns ------- """ # Set fixed malfunction duration for this test rail, rail_map = make_simple_rail2() action_dict: Dict[int, RailEnvActions] = {} stochastic_data = MalfunctionParameters(malfunction_rate=0.001, # Rate of malfunction occurence min_duration=10, # Minimal duration of malfunction max_duration=10 # Max duration of malfunction ) env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), obs_builder_object=SingleAgentNavigationObs() ) env.reset(False, False, activate_agents=True, random_seed=10) # Assertions assert_list = [9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 10, 9, 8, 7, 6, 5] print("[") for time_step in range(15): # Move in the env env.step(action_dict) # Check that next_step decreases as expected assert env.agents[0].malfunction_data['malfunction'] == assert_list[time_step]
def test_malfunction_before_entry(): """Tests that malfunctions are working properly for agents before entering the environment!""" # Set fixed malfunction duration for this test stochastic_data = MalfunctionParameters(malfunction_rate=2, # Rate of malfunction occurence min_duration=10, # Minimal duration of malfunction max_duration=10 # Max duration of malfunction ) rail, rail_map = make_simple_rail2() env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), obs_builder_object=SingleAgentNavigationObs() ) env.reset(False, False, False, random_seed=10) env.agents[0].target = (0, 0) # Test initial malfunction values for all agents # we want some agents to be malfuncitoning already and some to be working # we want different next_malfunction values for the agents assert env.agents[0].malfunction_data['malfunction'] == 0 assert env.agents[1].malfunction_data['malfunction'] == 10 assert env.agents[2].malfunction_data['malfunction'] == 0 assert env.agents[3].malfunction_data['malfunction'] == 10 assert env.agents[4].malfunction_data['malfunction'] == 10 assert env.agents[5].malfunction_data['malfunction'] == 10 assert env.agents[6].malfunction_data['malfunction'] == 10 assert env.agents[7].malfunction_data['malfunction'] == 10 assert env.agents[8].malfunction_data['malfunction'] == 10 assert env.agents[9].malfunction_data['malfunction'] == 10
def test_malfanction_from_params(): """ Test loading malfunction from Returns ------- """ stochastic_data = MalfunctionParameters( malfunction_rate=1000, # Rate of malfunction occurence min_duration=2, # Minimal duration of malfunction max_duration=5 # Max duration of malfunction ) rail, rail_map = make_simple_rail2() env = RailEnv( width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data)) env.reset() assert env.malfunction_process_data.malfunction_rate == 1000 assert env.malfunction_process_data.min_duration == 2 assert env.malfunction_process_data.max_duration == 5
def test_single_malfunction_generator(): """ Test single malfunction generator Returns ------- """ rail, rail_map = make_simple_rail2() env = RailEnv( width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, malfunction_generator_and_process_data=single_malfunction_generator( earlierst_malfunction=10, malfunction_duration=5)) for test in range(10): env.reset() action_dict = dict() tot_malfunctions = 0 print(test) for i in range(10): for agent in env.agents: # Go forward all the time action_dict[agent.handle] = RailEnvActions(2) env.step(action_dict) for agent in env.agents: # Go forward all the time tot_malfunctions += agent.malfunction_data['nr_malfunctions'] assert tot_malfunctions == 1
def test_seeding_and_observations(): # Test if two different instances diverge with different observations rail, rail_map = make_simple_rail2() # Make two seperate envs with different observation builders # Global Observation env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=12), number_of_agents=10, obs_builder_object=GlobalObsForRailEnv()) # Tree Observation env2 = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=12), number_of_agents=10, obs_builder_object=TreeObsForRailEnv( max_depth=2, predictor=ShortestPathPredictorForRailEnv())) env.reset(False, False, False, random_seed=12) env2.reset(False, False, False, random_seed=12) # Check that both environments produce the same initial start positions assert env.agents[0].initial_position == env2.agents[0].initial_position assert env.agents[1].initial_position == env2.agents[1].initial_position assert env.agents[2].initial_position == env2.agents[2].initial_position assert env.agents[3].initial_position == env2.agents[3].initial_position assert env.agents[4].initial_position == env2.agents[4].initial_position assert env.agents[5].initial_position == env2.agents[5].initial_position assert env.agents[6].initial_position == env2.agents[6].initial_position assert env.agents[7].initial_position == env2.agents[7].initial_position assert env.agents[8].initial_position == env2.agents[8].initial_position assert env.agents[9].initial_position == env2.agents[9].initial_position action_dict = {} for step in range(10): for a in range(env.get_num_agents()): action = np.random.randint(4) action_dict[a] = action env.step(action_dict) env2.step(action_dict) # Check that both environments end up in the same position assert env.agents[0].position == env2.agents[0].position assert env.agents[1].position == env2.agents[1].position assert env.agents[2].position == env2.agents[2].position assert env.agents[3].position == env2.agents[3].position assert env.agents[4].position == env2.agents[4].position assert env.agents[5].position == env2.agents[5].position assert env.agents[6].position == env2.agents[6].position assert env.agents[7].position == env2.agents[7].position assert env.agents[8].position == env2.agents[8].position assert env.agents[9].position == env2.agents[9].position for a in range(env.get_num_agents()): print("assert env.agents[{}].position == env2.agents[{}].position". format(a, a))
def test_random_seeding(): # Set fixed malfunction duration for this test rail, rail_map = make_simple_rail2() # Move target to unreachable position in order to not interfere with test for idx in range(100): env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=12), number_of_agents=10) env.reset(True, True, False, random_seed=1) env.agents[0].target = (0, 0) for step in range(10): actions = {} actions[0] = 2 env.step(actions) agent_positions = [] env.agents[0].initial_position == (3, 2) env.agents[1].initial_position == (3, 5) env.agents[2].initial_position == (3, 6) env.agents[3].initial_position == (5, 6) env.agents[4].initial_position == (3, 4) env.agents[5].initial_position == (3, 1) env.agents[6].initial_position == (3, 9) env.agents[7].initial_position == (4, 6) env.agents[8].initial_position == (0, 3) env.agents[9].initial_position == (3, 7)
def test_malfunction_process(): # Set fixed malfunction duration for this test stochastic_data = MalfunctionParameters( malfunction_rate=1, # Rate of malfunction occurence min_duration=3, # Minimal duration of malfunction max_duration=3 # Max duration of malfunction ) rail, rail_map = make_simple_rail2() env = RailEnv( width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data), obs_builder_object=SingleAgentNavigationObs()) obs, info = env.reset(False, False, True, random_seed=10) agent_halts = 0 total_down_time = 0 agent_old_position = env.agents[0].position # Move target to unreachable position in order to not interfere with test env.agents[0].target = (0, 0) for step in range(100): actions = {} for i in range(len(obs)): actions[i] = np.argmax(obs[i]) + 1 obs, all_rewards, done, _ = env.step(actions) if env.agents[0].malfunction_data['malfunction'] > 0: agent_malfunctioning = True else: agent_malfunctioning = False if agent_malfunctioning: # Check that agent is not moving while malfunctioning assert agent_old_position == env.agents[0].position agent_old_position = env.agents[0].position total_down_time += env.agents[0].malfunction_data['malfunction'] # Check that the appropriate number of malfunctions is achieved assert env.agents[0].malfunction_data[ 'nr_malfunctions'] == 23, "Actual {}".format( env.agents[0].malfunction_data['nr_malfunctions']) # Check that malfunctioning data was standing around assert total_down_time > 0
def test_malfunction_process_statistically(): """Tests that malfunctions are produced by stochastic_data!""" # Set fixed malfunction duration for this test stochastic_data = MalfunctionParameters( malfunction_rate=1 / 5, # Rate of malfunction occurence min_duration=5, # Minimal duration of malfunction max_duration=5 # Max duration of malfunction ) rail, rail_map = make_simple_rail2() env = RailEnv( width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data), obs_builder_object=SingleAgentNavigationObs()) env.reset(True, True, False, random_seed=10) env.agents[0].target = (0, 0) # Next line only for test generation # agent_malfunction_list = [[] for i in range(10)] agent_malfunction_list = [ [0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4], [0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2], [0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1], [0, 0, 5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2, 1, 0, 5, 4, 3, 2, 1], [0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0], [5, 4, 3, 2, 1, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 5], [5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 4, 3, 2], [5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 5, 4] ] for step in range(20): action_dict: Dict[int, RailEnvActions] = {} for agent_idx in range(env.get_num_agents()): # We randomly select an action action_dict[agent_idx] = RailEnvActions(np.random.randint(4)) # For generating tests only: # agent_malfunction_list[agent_idx].append(env.agents[agent_idx].malfunction_data['malfunction']) assert env.agents[agent_idx].malfunction_data[ 'malfunction'] == agent_malfunction_list[agent_idx][step] env.step(action_dict)
def test_last_malfunction_step(): """ Test to check that agent moves when it is not malfunctioning """ # Set fixed malfunction duration for this test rail, rail_map = make_simple_rail2() env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=2), number_of_agents=1, random_seed=1) env.reset() env.agents[0].speed_data['speed'] = 1. / 3. env.agents[0].target = (0, 0) env.reset(False, False, True) # Force malfunction to be off at beginning and next malfunction to happen in 2 steps env.agents[0].malfunction_data['next_malfunction'] = 2 env.agents[0].malfunction_data['malfunction'] = 0 env_data = [] for step in range(20): action_dict: Dict[int, RailEnvActions] = {} for agent in env.agents: # Go forward all the time action_dict[agent.handle] = RailEnvActions(2) if env.agents[0].malfunction_data['malfunction'] < 1: agent_can_move = True # Store the position before and after the step pre_position = env.agents[0].speed_data['position_fraction'] _, reward, _, _ = env.step(action_dict) # Check if the agent is still allowed to move in this step if env.agents[0].malfunction_data['malfunction'] > 0: agent_can_move = False post_position = env.agents[0].speed_data['position_fraction'] # Assert that the agent moved while it was still allowed if agent_can_move: assert pre_position != post_position else: assert post_position == pre_position
def test_initial_malfunction_do_nothing(): stochastic_data = MalfunctionParameters(malfunction_rate=70, # Rate of malfunction occurence min_duration=2, # Minimal duration of malfunction max_duration=5 # Max duration of malfunction ) rail, rail_map = make_simple_rail2() env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), # Malfunction data generator ) env.reset() set_penalties_for_replay(env) replay_config = ReplayConfig( replay=[ Replay( position=None, direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, set_malfunction=3, malfunction=3, reward=env.step_penalty, # full step penalty while malfunctioning status=RailAgentStatus.READY_TO_DEPART ), Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, malfunction=2, reward=env.step_penalty, # full step penalty while malfunctioning status=RailAgentStatus.ACTIVE ), # malfunction stops in the next step and we're still at the beginning of the cell # --> if we take action DO_NOTHING, agent should restart without moving # Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, malfunction=1, reward=env.step_penalty, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), # we haven't started moving yet --> stay here Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, malfunction=0, reward=env.step_penalty, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=0, reward=env.start_penalty + env.step_penalty * 1.0, # start penalty + step penalty for speed 1.0 status=RailAgentStatus.ACTIVE ), # we start to move forward --> should go to next cell now Replay( position=(3, 3), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=0, reward=env.step_penalty * 1.0, # step penalty for speed 1.0 status=RailAgentStatus.ACTIVE ) ], speed=env.agents[0].speed_data['speed'], target=env.agents[0].target, initial_position=(3, 2), initial_direction=Grid4TransitionsEnum.EAST, ) run_replay_config(env, [replay_config], activate_agents=False)
def test_initial_malfunction_stop_moving(): rail, rail_map = make_simple_rail2() env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, obs_builder_object=SingleAgentNavigationObs()) env.reset() print(env.agents[0].initial_position, env.agents[0].direction, env.agents[0].position, env.agents[0].status) set_penalties_for_replay(env) replay_config = ReplayConfig( replay=[ Replay( position=None, direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, set_malfunction=3, malfunction=3, reward=env.step_penalty, # full step penalty when stopped status=RailAgentStatus.READY_TO_DEPART ), Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, malfunction=2, reward=env.step_penalty, # full step penalty when stopped status=RailAgentStatus.ACTIVE ), # malfunction stops in the next step and we're still at the beginning of the cell # --> if we take action STOP_MOVING, agent should restart without moving # Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.STOP_MOVING, malfunction=1, reward=env.step_penalty, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), # we have stopped and do nothing --> should stand still Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.DO_NOTHING, malfunction=0, reward=env.step_penalty, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), # we start to move forward --> should go to next cell now Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=0, reward=env.start_penalty + env.step_penalty * 1.0, # full step penalty while stopped status=RailAgentStatus.ACTIVE ), Replay( position=(3, 3), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=0, reward=env.step_penalty * 1.0, # full step penalty while stopped status=RailAgentStatus.ACTIVE ) ], speed=env.agents[0].speed_data['speed'], target=env.agents[0].target, initial_position=(3, 2), initial_direction=Grid4TransitionsEnum.EAST, ) run_replay_config(env, [replay_config], activate_agents=False)
def test_initial_malfunction(): stochastic_data = MalfunctionParameters(malfunction_rate=1000, # Rate of malfunction occurence min_duration=2, # Minimal duration of malfunction max_duration=5 # Max duration of malfunction ) rail, rail_map = make_simple_rail2() env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(seed=10), number_of_agents=1, malfunction_generator_and_process_data=malfunction_from_params(stochastic_data), # Malfunction data generator obs_builder_object=SingleAgentNavigationObs() ) # reset to initialize agents_static env.reset(False, False, True, random_seed=10) print(env.agents[0].malfunction_data) env.agents[0].target = (0, 5) set_penalties_for_replay(env) replay_config = ReplayConfig( replay=[ Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, set_malfunction=3, malfunction=3, reward=env.step_penalty # full step penalty when malfunctioning ), Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=2, reward=env.step_penalty # full step penalty when malfunctioning ), # malfunction stops in the next step and we're still at the beginning of the cell # --> if we take action MOVE_FORWARD, agent should restart and move to the next cell Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=1, reward=env.step_penalty ), # malfunctioning ends: starting and running at speed 1.0 Replay( position=(3, 2), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=0, reward=env.start_penalty + env.step_penalty * 1.0 # running at speed 1.0 ), Replay( position=(3, 3), direction=Grid4TransitionsEnum.EAST, action=RailEnvActions.MOVE_FORWARD, malfunction=0, reward=env.step_penalty # running at speed 1.0 ) ], speed=env.agents[0].speed_data['speed'], target=env.agents[0].target, initial_position=(3, 2), initial_direction=Grid4TransitionsEnum.EAST, ) run_replay_config(env, [replay_config])
def test_dummy_predictor(rendering=False): rail, rail_map = make_simple_rail2() env = RailEnv( width=rail_map.shape[1], height=rail_map.shape[0], rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=1, obs_builder_object=TreeObsForRailEnv( max_depth=2, predictor=DummyPredictorForRailEnv(max_depth=10)), ) env.reset() # set initial position and direction for testing... env.agents[0].initial_position = (5, 6) env.agents[0].initial_direction = 0 env.agents[0].direction = 0 env.agents[0].target = (3, 0) env.reset(False, False) env.set_agent_active(env.agents[0]) if rendering: renderer = RenderTool(env, gl="PILSVG") renderer.render_env(show=True, show_observations=False) input("Continue?") # test assertions predictions = env.obs_builder.predictor.get(None) positions = np.array( list(map(lambda prediction: [*prediction[1:3]], predictions[0]))) directions = np.array( list(map(lambda prediction: [prediction[3]], predictions[0]))) time_offsets = np.array( list(map(lambda prediction: [prediction[0]], predictions[0]))) actions = np.array( list(map(lambda prediction: [prediction[4]], predictions[0]))) # compare against expected values expected_positions = np.array([ [5., 6.], [4., 6.], [3., 6.], [3., 5.], [3., 4.], [3., 3.], [3., 2.], [3., 1.], # at target (3,0): stay in this position from here on [3., 0.], [3., 0.], [3., 0.], ]) expected_directions = np.array([ [0.], [0.], [0.], [3.], [3.], [3.], [3.], [3.], # at target (3,0): stay in this position from here on [3.], [3.], [3.] ]) expected_time_offsets = np.array([ [0.], [1.], [2.], [3.], [4.], [5.], [6.], [7.], [8.], [9.], [10.], ]) expected_actions = np.array([ [0.], [2.], [2.], [2.], [2.], [2.], [2.], [2.], # reaching target by straight [2.], # at target: stopped moving [4.], [4.], ]) assert np.array_equal(positions, expected_positions) assert np.array_equal(directions, expected_directions) assert np.array_equal(time_offsets, expected_time_offsets) assert np.array_equal(actions, expected_actions)
def test_seeding_and_malfunction(): # Test if two different instances diverge with different observations rail, rail_map = make_simple_rail2() stochastic_data = { 'prop_malfunction': 0.4, 'malfunction_rate': 2, 'min_duration': 10, 'max_duration': 10 } # Make two seperate envs with different and see if the exhibit the same malfunctions # Global Observation for tests in range(1, 100): env = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, obs_builder_object=GlobalObsForRailEnv()) # Tree Observation env2 = RailEnv(width=25, height=30, rail_generator=rail_from_grid_transition_map(rail), schedule_generator=random_schedule_generator(), number_of_agents=10, obs_builder_object=GlobalObsForRailEnv()) env.reset(True, False, True, random_seed=tests) env2.reset(True, False, True, random_seed=tests) # Check that both environments produce the same initial start positions assert env.agents[0].initial_position == env2.agents[ 0].initial_position assert env.agents[1].initial_position == env2.agents[ 1].initial_position assert env.agents[2].initial_position == env2.agents[ 2].initial_position assert env.agents[3].initial_position == env2.agents[ 3].initial_position assert env.agents[4].initial_position == env2.agents[ 4].initial_position assert env.agents[5].initial_position == env2.agents[ 5].initial_position assert env.agents[6].initial_position == env2.agents[ 6].initial_position assert env.agents[7].initial_position == env2.agents[ 7].initial_position assert env.agents[8].initial_position == env2.agents[ 8].initial_position assert env.agents[9].initial_position == env2.agents[ 9].initial_position action_dict = {} for step in range(10): for a in range(env.get_num_agents()): action = np.random.randint(4) action_dict[a] = action # print("----------------------") # print(env.agents[a].malfunction_data, env.agents[a].status) # print(env2.agents[a].malfunction_data, env2.agents[a].status) _, reward1, done1, _ = env.step(action_dict) _, reward2, done2, _ = env2.step(action_dict) for a in range(env.get_num_agents()): assert reward1[a] == reward2[a] assert done1[a] == done2[a] # Check that both environments end up in the same position assert env.agents[0].position == env2.agents[0].position assert env.agents[1].position == env2.agents[1].position assert env.agents[2].position == env2.agents[2].position assert env.agents[3].position == env2.agents[3].position assert env.agents[4].position == env2.agents[4].position assert env.agents[5].position == env2.agents[5].position assert env.agents[6].position == env2.agents[6].position assert env.agents[7].position == env2.agents[7].position assert env.agents[8].position == env2.agents[8].position assert env.agents[9].position == env2.agents[9].position