Python BattlesnakeGym 예제들, battlesnake_gym.snake_gym.BattlesnakeGym Python 예제들

예제 #1

0

파일 보기

파일: test_battlesnake_gym.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

    def test_snake_eaten_adjacent_tile_same_size(self):
        '''
        Tests that if two snakes of the same size eat each other, they both die
        see: outcome option = "Snake was eaten - adjacent tile" in snake_gym._did_snake_collide
        '''
        snake_location = [(4, 1), (4, 12)]
        food_location = [(4, 2), (4, 9), (4, 3), (4, 8), (0, 0), (0, 0),
                         (0, 0)]
        env = BattlesnakeGym(map_size=(13, 13),
                             number_of_snakes=2,
                             snake_spawn_locations=snake_location,
                             food_spawn_locations=food_location,
                             verbose=True)

        env.food.max_turns_to_next_food_spawn = 2  # Hack to make sure that food is spawned every turn

        actions = [[Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT],
                   [Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT],
                   [Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT],
                   [Snake.RIGHT, Snake.LEFT]]

        simulate_snake(env,
                       actions,
                       render=should_render(),
                       break_with_done=False)

        snakes_alive = [snake.is_alive() for snake in env.snakes.get_snakes()]
        self.assertTrue(np.sum(snakes_alive) == 0)

        env.close()

예제 #2

0

파일 보기

파일: ma_battlesnake.py 프로젝트: dangor/bonhomme-snake-RLlibEnv

    def __init__(self, num_agents, map_height, heuristics, rewards=SimpleRewards()):
        observation_type = "max-bordered-51s"
         
        self.env = BattlesnakeGym(
            observation_type=observation_type,
            number_of_snakes=num_agents, 
            map_size=(map_height, map_height), rewards=rewards)
        
        self.observation_height = self.MAX_MAP_HEIGHT
        self.action_space = self.env.action_space[0]
        
        gym_observation_space = gym.spaces.Box(low=-1.0, high=5.0,
                                               shape=(self.observation_height, self.observation_height, 6), dtype=np.float32)

        self.observation_space = gym.spaces.Dict({
            "action_mask": gym.spaces.Box(0, 1, shape=(4,),
                dtype=np.float32),
            "state": gym_observation_space})

        self.num_agents = num_agents
        self.observation_type = observation_type
        self.old_obs1 = {}
        self.heuristics = heuristics
        if len(self.heuristics) > 0:
            self.battlesnake_heuristics = MyBattlesnakeHeuristics()
            self.heuristics_list = []
            for heuristic_name in self.heuristics:
                if heuristic_name == "banned_forbidden_moves":
                    self.heuristics_list.append(self.battlesnake_heuristics.banned_forbidden_moves)
                elif heuristic_name == "banned_wall_hits":
                    self.heuristics_list.append(self.battlesnake_heuristics.banned_wall_hits)
        self.rewards = rewards

예제 #3

0

파일 보기

파일: test_battlesnake_gym.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

    def test_random_spawning(self):
        '''
        Test that snakes and food are correct when randomly spawned
        '''
        env = BattlesnakeGym(map_size=(9, 9), number_of_snakes=1)

        # Check that a snake is spawned on the board
        self.assertTrue(len(env.snakes.snakes) > 0)

        # Check that there is a food on the board
        self.assertTrue(env.food.locations_map.sum() > 0)
        env.close()

예제 #4

0

파일 보기

파일: test_battlesnake_gym.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

    def test_snake_health(self):
        '''
        Test that snake dies after moving 100 times. i.e., health == 0
        '''

        snake_location = [(0, 0)]
        food_location = [(5, 5) for _ in range(0, 200)]
        env = BattlesnakeGym(map_size=(9, 10),
                             number_of_snakes=1,
                             snake_spawn_locations=snake_location,
                             food_spawn_locations=food_location,
                             verbose=True)

        actions = [[Snake.RIGHT]]
        simulate_snake(env, actions, render=should_render())
        self.assertTrue(
            env.snakes.get_snakes()[0].health == Snake.FULL_HEALTH - 1)

        actions = []
        for i in range(1, Snake.FULL_HEALTH - 1):
            if int(i % 32 / 8) == 0:
                actions.append([Snake.RIGHT])
                continue
            if int(i % 32 / 16) == 0:
                actions.append([Snake.DOWN])
                continue
            if int(i % 32 / 24) == 0:
                actions.append([Snake.LEFT])
                continue
            if int(i % 32 / 32) == 0:
                actions.append([Snake.UP])
                continue
        simulate_snake(env,
                       actions,
                       render=should_render(),
                       break_with_done=False)
        self.assertTrue(env.snakes.get_snakes()[0].health == 1)
        self.assertTrue(env.snakes.get_snakes()[0].is_alive())

        actions = [[Snake.RIGHT]]
        simulate_snake(env, actions, render=should_render())
        self.assertTrue(env.snakes.get_snakes()[0].health == 0)
        self.assertFalse(env.snakes.get_snakes()[0].is_alive())

        # Check snake died
        actions = [[Snake.RIGHT]]
        simulate_snake(env, actions, render=should_render())
        self.assertTrue(np.sum(env.snakes.get_snake_51_map()) == 0)
        env.close()

예제 #5

0

파일 보기

파일: test_utils.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

def grow_two_snakes(snake_starting_positions):
    '''
    Helper function to grow two snakes based on the snake_starting_position.
    '''
    snake_location = snake_starting_positions
    food_location = [(2, 0), (2, 2), (4, 2), (2, 4), (4, 6), (7, 5), (7, 4),
                     (7, 3), (7, 2)] + [(0, 0)] * 100
    env = BattlesnakeGym(map_size=(9, 9),
                         number_of_snakes=2,
                         snake_spawn_locations=snake_location,
                         food_spawn_locations=food_location,
                         verbose=True)
    env.food.max_turns_to_next_food_spawn = 2  # Hack to make sure that food is spawned every turn

    actions_snake1 = [[Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT],
                      [Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT],
                      [Snake.UP], [Snake.UP], [Snake.RIGHT], [Snake.RIGHT],
                      [Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT],
                      [Snake.DOWN], [Snake.DOWN], [Snake.DOWN]]

    actions_snake2 = [[Snake.RIGHT]] * 7 + [[
        Snake.DOWN
    ]] + [[Snake.LEFT]] * 7 + [[Snake.DOWN]] + [[Snake.RIGHT]] * 3
    tmp_actions = list(zip(actions_snake1, actions_snake2))
    actions = []
    for action in tmp_actions:
        actions.append(np.array([action[0], action[1]]))

    simulate_snake(env, actions, render=should_render(), break_with_done=False)
    return env

예제 #6

0

파일 보기

파일: test_utils.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

def grow_snake():
    ''''
    Helper function to grow a snake.
    '''
    snake_location = [(0, 0)]
    food_location = [(2, 0), (4, 2), (2, 4), (4, 6), (6, 8), (0, 0), (0, 0),
                     (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0),
                     (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0),
                     (0, 0), (0, 0)]
    env = BattlesnakeGym(map_size=(9, 9),
                         number_of_snakes=1,
                         snake_spawn_locations=snake_location,
                         food_spawn_locations=food_location,
                         verbose=True)

    env.food.max_turns_to_next_food_spawn = 2  # Hack to make sure that food is spawned every turn

    actions = [[Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT],
               [Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT],
               [Snake.UP], [Snake.UP], [Snake.RIGHT], [Snake.RIGHT],
               [Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT],
               [Snake.DOWN], [Snake.DOWN], [Snake.DOWN]]

    simulate_snake(env, actions, render=should_render(), break_with_done=False)
    return env

예제 #7

0

파일 보기

파일: test_battlesnake_gym.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

    def test_states(self):
        '''
        Test that the state returned is correct
        '''
        snake_location = [(0, 0)]
        food_location = [(1, 0), (1, 2), (2, 0), (2, 0), (2, 0)]
        env = BattlesnakeGym(map_size=(3, 3),
                             number_of_snakes=1,
                             snake_spawn_locations=snake_location,
                             food_spawn_locations=food_location)
        env.food.max_turns_to_next_food_spawn = 2  #Hack to make sure that food is spawned every turn

        actions = [[Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT], [Snake.DOWN],
                   [Snake.LEFT]]
        observation, _, _, _ = simulate_snake(env,
                                              actions,
                                              render=False,
                                              break_with_done=False)

        food_state = np.zeros(shape=(3, 3), dtype=np.uint8)
        food_state[1, 2] = 1

        snake_state = np.zeros(shape=(3, 3), dtype=np.uint8)
        snake_state[1, 1] = 1
        snake_state[2, 2] = 1
        snake_state[1, 2] = 1
        snake_state[2, 1] = 5

        self.assertTrue(np.array_equal(observation[:, :, 0], food_state))
        self.assertTrue(np.array_equal(observation[:, :, 1], snake_state))

예제 #8

0

파일 보기

파일: test_battlesnake_gym.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

    def test_spawning(self):
        '''
        Test that snakes and food are correct when deterministically spawned (for testing)
        '''
        snake_location = [(4, 4)]
        food_location = [(5, 5)]
        env = BattlesnakeGym(map_size=(9, 9),
                             number_of_snakes=1,
                             snake_spawn_locations=snake_location,
                             food_spawn_locations=food_location)

        # Check that the snake is spawned correctly
        self.assertTrue(
            np.array_equal(env.snakes.snakes[0].locations, snake_location))

        # Check that food is spanwed correctly
        self.assertTrue(env.food.locations_map[food_location[0][0],
                                               food_location[0][1]] == 1)
        env.close()

예제 #9

0

파일 보기

파일: test_battlesnake_gym.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

    def test_snake_move(self):
        '''
        Test that the snake moves correctly.
        Spawn a snake in a certain location and moving them one space in each direction 
        UP, DOWN, LEFT, RIGHT
        '''
        snake_location = [(4, 4)]
        food_location = [(5, 5)]
        env = BattlesnakeGym(map_size=(9, 10),
                             number_of_snakes=1,
                             snake_spawn_locations=snake_location,
                             food_spawn_locations=food_location)

        actions = [[Snake.UP]]
        simulate_snake(env, actions, render=should_render())
        snake_location_moved = [(4, 4), (3, 4)]
        self.assertTrue(
            np.array_equal(env.snakes.get_snakes()[0].locations,
                           snake_location_moved))

        actions = [[Snake.LEFT]]
        simulate_snake(env, actions, render=should_render())
        snake_location_moved = [(4, 4), (3, 4), (3, 3)]
        self.assertTrue(
            np.array_equal(env.snakes.get_snakes()[0].locations,
                           snake_location_moved))

        actions = [[Snake.DOWN]]
        simulate_snake(env, actions, render=should_render())
        snake_location_moved = [(3, 4), (3, 3), (4, 3)]
        self.assertTrue(
            np.array_equal(env.snakes.get_snakes()[0].locations,
                           snake_location_moved))

        actions = [[Snake.RIGHT]]
        simulate_snake(env, actions, render=should_render())
        snake_location_moved = [(3, 3), (4, 3), (4, 4)]
        self.assertTrue(
            np.array_equal(env.snakes.get_snakes()[0].locations,
                           snake_location_moved))
        env.close()

예제 #10

0

파일 보기

파일: ma_battlesnake.py 프로젝트: AnkilP/BattleSnakes

    def __init__(self, num_agents, map_height):
        observation_type = "max-bordered-51s"
        self.env = BattlesnakeGym(
            observation_type=observation_type,
            number_of_snakes=num_agents, 
            map_size=(map_height, map_height))
        
        if "bordered" in observation_type:
            if "max-bordered" in observation_type:
                self.observation_height = self.MAX_MAP_HEIGHT
            else: # If only bordered with 2 rows of -1
                self.observation_height = map_height + 2
        else: # Flat without border
            self.observation_height = map_height

        self.action_space = self.env.action_space[0]

        self.observation_space = gym.spaces.Box(low=-1.0, high=5.0, shape=(self.observation_height, self.observation_height, 6), dtype=np.float32)
        self.num_agents = num_agents
        self.observation_type = observation_type
        self.old_obs1 = {}

예제 #11

0

파일 보기

파일: test_snake_model.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

def run(args):
    max_time_steps = 1000

    map_size = tuple(args.map_size)
    number_of_snakes = args.number_of_snakes
    state_shape = (map_size[0], map_size[1], (1 + number_of_snakes))

    env = BattlesnakeGym(map_size=map_size, number_of_snakes=number_of_snakes)

    agents = []

    for i in range(number_of_snakes):
        agent = Agent_mxnet(state_shape=state_shape, action_size=4, seed=0)
        agent.qnetwork_local.load_parameters(args.model_name.format(i),
                                             ctx=ctx)

    state = env.reset()
    for t in range(max_time_steps):
        actions = []
        for i in range(number_of_snakes):
            state_agent_i = sort_states_for_snake_id(state, i + 1)
            action = agent.act(state_agent_i, eps=0)
            actions.append(action)
        next_state, reward, done, _ = env.step(np.array(actions))
        env.render(mode=args.render_mode)
        time.sleep(0.2)
        state = next_state
        if done:
            break

예제 #12

0

파일 보기

파일: measure_performance.py 프로젝트: lapis2002/sagemaker-battlesnake-ai

def test_gym_performance(map_sizes, number_of_snakes):
    """
    Measure the performance of the gym in steps per seconds

    :param map_sizes: [()]
    :param number_of_snakes: []
    :return:
    """

    max_turns_num = 100
    for map_size in map_sizes:
        for num_snakes in number_of_snakes:
            # Create the snakes one after each others
            snake_locations = []
            i = 0
            j = 0
            for _ in range(num_snakes):
                snake_locations.append([i, j])
                if j >= map_size[1] - 2:
                    i += 1
                    j = 0
                else:
                    j += 1
                if i >= map_size[0] - 2:
                    raise Exception(
                        "Incompatible map size and number of snakes")

            # Create the food always in the same spot
            food_location = [(map_size[0] - 1, map_size[1] - 1)
                             ] * max_turns_num * max_turns_num
            print(snake_locations)
            # Create the gym
            env = BattlesnakeGym(map_size=map_size,
                                 number_of_snakes=num_snakes,
                                 snake_spawn_locations=snake_locations,
                                 food_spawn_locations=food_location)

            actions = [[Snake.RIGHT] * num_snakes, [Snake.DOWN] * num_snakes,
                       [Snake.LEFT] * num_snakes, [Snake.UP] * num_snakes
                       ] * max_turns_num

            tic = time.time()
            _, _, _, info = simulate_snake(env, actions, render=False)
            toc = time.time()

            print(
                "Map Size {}, Num Snake {}, Num Turns {}, Total time: {:.4f}s, Steps per seconds {:.4f}"
                .format(map_size, num_snakes, info['current_turn'], toc - tic,
                        info['current_turn'] / (toc - tic)))

예제 #13

0

파일 보기

파일: ma_battlesnake.py 프로젝트: dangor/bonhomme-snake-RLlibEnv

class MultiAgentBattlesnake(MultiAgentEnv):

    MAX_MAP_HEIGHT = 21
        
    def __init__(self, num_agents, map_height, heuristics, rewards=SimpleRewards()):
        observation_type = "max-bordered-51s"
         
        self.env = BattlesnakeGym(
            observation_type=observation_type,
            number_of_snakes=num_agents, 
            map_size=(map_height, map_height), rewards=rewards)
        
        self.observation_height = self.MAX_MAP_HEIGHT
        self.action_space = self.env.action_space[0]
        
        gym_observation_space = gym.spaces.Box(low=-1.0, high=5.0,
                                               shape=(self.observation_height, self.observation_height, 6), dtype=np.float32)

        self.observation_space = gym.spaces.Dict({
            "action_mask": gym.spaces.Box(0, 1, shape=(4,),
                dtype=np.float32),
            "state": gym_observation_space})

        self.num_agents = num_agents
        self.observation_type = observation_type
        self.old_obs1 = {}
        self.heuristics = heuristics
        if len(self.heuristics) > 0:
            self.battlesnake_heuristics = MyBattlesnakeHeuristics()
            self.heuristics_list = []
            for heuristic_name in self.heuristics:
                if heuristic_name == "banned_forbidden_moves":
                    self.heuristics_list.append(self.battlesnake_heuristics.banned_forbidden_moves)
                elif heuristic_name == "banned_wall_hits":
                    self.heuristics_list.append(self.battlesnake_heuristics.banned_wall_hits)
        self.rewards = rewards
        
    def set_effective_map_size(self, eff_map_size):
        self.__init__(self.num_agents, eff_map_size, self.heuristics, self.rewards)
        self.reset()

    def reset(self):
        self.mask = {}
        new_obs, _, _, info = self.env.reset()

        obs = {}

        # add empty map placeholders for use until we've seen 2 steps
        empty_map = np.zeros((self.observation_height, self.observation_height, 3))
        
        new_obs = np.array(new_obs, dtype=np.float32)

        for i in range(self.num_agents):
            agent_id = "agent_{}".format(i)
            
            obs_i = sort_states_for_snake_id(new_obs, i+1)
            
            merged_map = np.concatenate((empty_map, obs_i), axis=-1)

            if len(self.heuristics) > 0:
                health = {k: 100 for k in range(self.num_agents)}
                mask = self.battlesnake_heuristics.get_action_masks_from_functions(
                    obs_i, i, 0, health, self.env, 
                    functions=self.heuristics_list)
            else:
                mask = np.array([1, 1, 1, 1])
            obs[agent_id] = {"state": merged_map, "action_mask": mask}
            
            self.mask[agent_id] = obs[agent_id]["action_mask"]
            self.old_obs1[agent_id] = obs_i 
            
        return obs

    def step(self, action_dict):
        actions = []

        for key, value in sorted(action_dict.items()):
            actions.append(value)

        o, r, d, info = self.env.step(actions)
        rewards = {}
        obs = {}
        infos = {}

        for i, key in enumerate(sorted(action_dict.keys())):            
            old_obs1 = self.old_obs1[key]
            
            obs_i = np.array(o, dtype=np.float32)
            obs_i = sort_states_for_snake_id(obs_i, i+1)
            
            merged_map = np.concatenate((old_obs1, obs_i), axis=-1)
            
            infos[key] = info
            rewards[key] = r[i]
            if len(self.heuristics) > 0 and self.env.snakes.get_snakes()[i].is_alive():
                turn_count = info["current_turn"]+1
                health = info["snake_health"]

                mask = self.battlesnake_heuristics.get_action_masks_from_functions(
                        obs_i, i, turn_count, health, self.env, 
                        functions=self.heuristics_list)
                                
            else:
                mask = np.array([1, 1, 1, 1])

            obs[key] = {"state": merged_map, "action_mask": mask}
            self.old_obs1[key] = np.array(obs_i, dtype=np.float32)
            
            self.mask[key] = obs[key]["action_mask"]

        dead_count = 0
        for x in range(self.num_agents):
            if d[x] == True:
                dead_count += 1

        dones = {'__all__': dead_count >= self.num_agents-1}

        return obs, rewards, dones, infos

예제 #14

0

파일 보기

def run(seed, args):
    print("Running with seed = {}".format(seed))
    map_size = json.loads(args.map_size)

    # Initialise logging
    if args.model_dir is None:
        # Check if the model is running in Sagemaker
        if "SM_MODEL_DIR" in os.environ:
            model_dir = os.environ['SM_MODEL_DIR']
        else:
            model_dir = "params"

    # Check if the model is running in Sagemaker
    load = args.load
    if 'SM_CHANNEL_WEIGHTS' in os.environ and load is not None:
        load = os.environ['SM_CHANNEL_WEIGHTS'] + "//" + load

    if args.writer:
        writer = SummaryWriter("logs/{}-seed{}".format(run_name, seed),
                               verbose=False)
    else:
        writer = None

    # Initialise the environment
    env = BattlesnakeGym(map_size=map_size,
                         observation_type=args.snake_representation)
    env.seed(seed)

    # Initialise agent
    if args.state_type == "layered":
        state_depth = 1 + args.number_of_snakes
    elif args.state_type == "one_versus_all":
        state_depth = 3

    if "bordered" in args.snake_representation:
        state_shape = (map_size[0] + 2, map_size[1] + 2, state_depth)
    else:
        state_shape = (map_size[0], map_size[1], state_depth)

    agent_params = (
        seed,
        model_dir,
        load,
        args.load_only_conv_layers,
        args.models_to_save,
        # State configurations
        args.state_type,
        state_shape,
        args.number_of_snakes,

        # Learning configurations
        args.buffer_size,
        args.update_every,
        args.lr_start,
        args.lr_step,
        args.lr_factor,
        args.gamma,
        args.tau,
        args.batch_size,

        # Network configurations
        args.qnetwork_type,
        args.sequence_length,
        args.starting_channels,
        args.number_of_conv_layers,
        args.number_of_dense_layers,
        args.number_of_hidden_states,
        args.depthS,
        args.depth,
        args.kernel_size,
        args.repeat_size,
        args.activation_type)

    agent = MultiAgentsCollection(*agent_params)

    trainer(env, agent, args.number_of_snakes, args.run_name, args.episodes,
            args.max_t, args.warmup, args.eps_start, args.eps_end,
            args.eps_decay, args.print_score_steps, args.save_only_best_models,
            args.save_model_every, args.render_steps, args.should_render,
            args.writer, args.print_progress)

예제 #15

0

파일 보기

파일: ma_battlesnake.py 프로젝트: AnkilP/BattleSnakes

class MultiAgentBattlesnake(MultiAgentEnv):

    MAX_MAP_HEIGHT = 21
    def __init__(self, num_agents, map_height):
        observation_type = "max-bordered-51s"
        self.env = BattlesnakeGym(
            observation_type=observation_type,
            number_of_snakes=num_agents, 
            map_size=(map_height, map_height))
        
        if "bordered" in observation_type:
            if "max-bordered" in observation_type:
                self.observation_height = self.MAX_MAP_HEIGHT
            else: # If only bordered with 2 rows of -1
                self.observation_height = map_height + 2
        else: # Flat without border
            self.observation_height = map_height

        self.action_space = self.env.action_space[0]

        self.observation_space = gym.spaces.Box(low=-1.0, high=5.0, shape=(self.observation_height, self.observation_height, 6), dtype=np.float32)
        self.num_agents = num_agents
        self.observation_type = observation_type
        self.old_obs1 = {}

    def set_effective_map_size(self, eff_map_size):
        self.__init__(self.num_agents, eff_map_size)
        self.reset()

    def reset(self):
        new_obs, _, _, info = self.env.reset()

        obs = {}

        # add empty map placeholders for use until we've seen 2 steps
        empty_map = np.zeros((self.observation_height, self.observation_height, 3))
        
        new_obs = np.array(new_obs, dtype=np.float32)

        for i in range(self.num_agents):
            agent_id = "agent_{}".format(i)
            
            obs_i = sort_states_for_snake_id(new_obs, i+1)
            
            merged_map = np.concatenate((empty_map, obs_i), axis=-1)

            if self.num_agents > 1:
                obs[agent_id] = merged_map
            else:
                obs[agent_id] = merged_map
                
            self.old_obs1[agent_id] = obs_i 

        return obs

    def render(self):
        self.env.render()

    def step(self, action_dict):
        actions = []

        for key, value in sorted(action_dict.items()):
            actions.append(value)

        o, r, d, info = self.env.step(actions)
        rewards = {}
        obs = {}
        infos = {}

        for i, key in enumerate(sorted(action_dict.keys())):            
            old_obs1 = self.old_obs1[key]
            
            o_i = np.array(o, dtype=np.float32)
            o_i = sort_states_for_snake_id(o_i, i+1)
            
            merged_map = np.concatenate((old_obs1, o_i), axis=-1)
            
            infos[key] = info
            if self.num_agents > 1:
                rewards[key] = r[i]
                obs[key] = merged_map

            else:
                rewards[key] = r
                obs[key] = merged_map

            self.old_obs1[key] = np.array(o_i, dtype=np.float32)

        dead_count = 0
        for x in range(self.num_agents):
            if d[x] == True:
                dead_count += 1

        dones = {'__all__': dead_count >= self.num_agents-1}

        return obs, rewards, dones, infos