def test_snake_eaten_adjacent_tile_same_size(self): ''' Tests that if two snakes of the same size eat each other, they both die see: outcome option = "Snake was eaten - adjacent tile" in snake_gym._did_snake_collide ''' snake_location = [(4, 1), (4, 12)] food_location = [(4, 2), (4, 9), (4, 3), (4, 8), (0, 0), (0, 0), (0, 0)] env = BattlesnakeGym(map_size=(13, 13), number_of_snakes=2, snake_spawn_locations=snake_location, food_spawn_locations=food_location, verbose=True) env.food.max_turns_to_next_food_spawn = 2 # Hack to make sure that food is spawned every turn actions = [[Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT], [Snake.RIGHT, Snake.LEFT]] simulate_snake(env, actions, render=should_render(), break_with_done=False) snakes_alive = [snake.is_alive() for snake in env.snakes.get_snakes()] self.assertTrue(np.sum(snakes_alive) == 0) env.close()
def __init__(self, num_agents, map_height, heuristics, rewards=SimpleRewards()): observation_type = "max-bordered-51s" self.env = BattlesnakeGym( observation_type=observation_type, number_of_snakes=num_agents, map_size=(map_height, map_height), rewards=rewards) self.observation_height = self.MAX_MAP_HEIGHT self.action_space = self.env.action_space[0] gym_observation_space = gym.spaces.Box(low=-1.0, high=5.0, shape=(self.observation_height, self.observation_height, 6), dtype=np.float32) self.observation_space = gym.spaces.Dict({ "action_mask": gym.spaces.Box(0, 1, shape=(4,), dtype=np.float32), "state": gym_observation_space}) self.num_agents = num_agents self.observation_type = observation_type self.old_obs1 = {} self.heuristics = heuristics if len(self.heuristics) > 0: self.battlesnake_heuristics = MyBattlesnakeHeuristics() self.heuristics_list = [] for heuristic_name in self.heuristics: if heuristic_name == "banned_forbidden_moves": self.heuristics_list.append(self.battlesnake_heuristics.banned_forbidden_moves) elif heuristic_name == "banned_wall_hits": self.heuristics_list.append(self.battlesnake_heuristics.banned_wall_hits) self.rewards = rewards
def test_random_spawning(self): ''' Test that snakes and food are correct when randomly spawned ''' env = BattlesnakeGym(map_size=(9, 9), number_of_snakes=1) # Check that a snake is spawned on the board self.assertTrue(len(env.snakes.snakes) > 0) # Check that there is a food on the board self.assertTrue(env.food.locations_map.sum() > 0) env.close()
def test_snake_health(self): ''' Test that snake dies after moving 100 times. i.e., health == 0 ''' snake_location = [(0, 0)] food_location = [(5, 5) for _ in range(0, 200)] env = BattlesnakeGym(map_size=(9, 10), number_of_snakes=1, snake_spawn_locations=snake_location, food_spawn_locations=food_location, verbose=True) actions = [[Snake.RIGHT]] simulate_snake(env, actions, render=should_render()) self.assertTrue( env.snakes.get_snakes()[0].health == Snake.FULL_HEALTH - 1) actions = [] for i in range(1, Snake.FULL_HEALTH - 1): if int(i % 32 / 8) == 0: actions.append([Snake.RIGHT]) continue if int(i % 32 / 16) == 0: actions.append([Snake.DOWN]) continue if int(i % 32 / 24) == 0: actions.append([Snake.LEFT]) continue if int(i % 32 / 32) == 0: actions.append([Snake.UP]) continue simulate_snake(env, actions, render=should_render(), break_with_done=False) self.assertTrue(env.snakes.get_snakes()[0].health == 1) self.assertTrue(env.snakes.get_snakes()[0].is_alive()) actions = [[Snake.RIGHT]] simulate_snake(env, actions, render=should_render()) self.assertTrue(env.snakes.get_snakes()[0].health == 0) self.assertFalse(env.snakes.get_snakes()[0].is_alive()) # Check snake died actions = [[Snake.RIGHT]] simulate_snake(env, actions, render=should_render()) self.assertTrue(np.sum(env.snakes.get_snake_51_map()) == 0) env.close()
def grow_two_snakes(snake_starting_positions): ''' Helper function to grow two snakes based on the snake_starting_position. ''' snake_location = snake_starting_positions food_location = [(2, 0), (2, 2), (4, 2), (2, 4), (4, 6), (7, 5), (7, 4), (7, 3), (7, 2)] + [(0, 0)] * 100 env = BattlesnakeGym(map_size=(9, 9), number_of_snakes=2, snake_spawn_locations=snake_location, food_spawn_locations=food_location, verbose=True) env.food.max_turns_to_next_food_spawn = 2 # Hack to make sure that food is spawned every turn actions_snake1 = [[Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT], [Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT], [Snake.UP], [Snake.UP], [Snake.RIGHT], [Snake.RIGHT], [Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT], [Snake.DOWN], [Snake.DOWN], [Snake.DOWN]] actions_snake2 = [[Snake.RIGHT]] * 7 + [[ Snake.DOWN ]] + [[Snake.LEFT]] * 7 + [[Snake.DOWN]] + [[Snake.RIGHT]] * 3 tmp_actions = list(zip(actions_snake1, actions_snake2)) actions = [] for action in tmp_actions: actions.append(np.array([action[0], action[1]])) simulate_snake(env, actions, render=should_render(), break_with_done=False) return env
def grow_snake(): '''' Helper function to grow a snake. ''' snake_location = [(0, 0)] food_location = [(2, 0), (4, 2), (2, 4), (4, 6), (6, 8), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0)] env = BattlesnakeGym(map_size=(9, 9), number_of_snakes=1, snake_spawn_locations=snake_location, food_spawn_locations=food_location, verbose=True) env.food.max_turns_to_next_food_spawn = 2 # Hack to make sure that food is spawned every turn actions = [[Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT], [Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT], [Snake.UP], [Snake.UP], [Snake.RIGHT], [Snake.RIGHT], [Snake.DOWN], [Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT], [Snake.DOWN], [Snake.DOWN], [Snake.DOWN]] simulate_snake(env, actions, render=should_render(), break_with_done=False) return env
def test_states(self): ''' Test that the state returned is correct ''' snake_location = [(0, 0)] food_location = [(1, 0), (1, 2), (2, 0), (2, 0), (2, 0)] env = BattlesnakeGym(map_size=(3, 3), number_of_snakes=1, snake_spawn_locations=snake_location, food_spawn_locations=food_location) env.food.max_turns_to_next_food_spawn = 2 #Hack to make sure that food is spawned every turn actions = [[Snake.DOWN], [Snake.RIGHT], [Snake.RIGHT], [Snake.DOWN], [Snake.LEFT]] observation, _, _, _ = simulate_snake(env, actions, render=False, break_with_done=False) food_state = np.zeros(shape=(3, 3), dtype=np.uint8) food_state[1, 2] = 1 snake_state = np.zeros(shape=(3, 3), dtype=np.uint8) snake_state[1, 1] = 1 snake_state[2, 2] = 1 snake_state[1, 2] = 1 snake_state[2, 1] = 5 self.assertTrue(np.array_equal(observation[:, :, 0], food_state)) self.assertTrue(np.array_equal(observation[:, :, 1], snake_state))
def test_spawning(self): ''' Test that snakes and food are correct when deterministically spawned (for testing) ''' snake_location = [(4, 4)] food_location = [(5, 5)] env = BattlesnakeGym(map_size=(9, 9), number_of_snakes=1, snake_spawn_locations=snake_location, food_spawn_locations=food_location) # Check that the snake is spawned correctly self.assertTrue( np.array_equal(env.snakes.snakes[0].locations, snake_location)) # Check that food is spanwed correctly self.assertTrue(env.food.locations_map[food_location[0][0], food_location[0][1]] == 1) env.close()
def test_snake_move(self): ''' Test that the snake moves correctly. Spawn a snake in a certain location and moving them one space in each direction UP, DOWN, LEFT, RIGHT ''' snake_location = [(4, 4)] food_location = [(5, 5)] env = BattlesnakeGym(map_size=(9, 10), number_of_snakes=1, snake_spawn_locations=snake_location, food_spawn_locations=food_location) actions = [[Snake.UP]] simulate_snake(env, actions, render=should_render()) snake_location_moved = [(4, 4), (3, 4)] self.assertTrue( np.array_equal(env.snakes.get_snakes()[0].locations, snake_location_moved)) actions = [[Snake.LEFT]] simulate_snake(env, actions, render=should_render()) snake_location_moved = [(4, 4), (3, 4), (3, 3)] self.assertTrue( np.array_equal(env.snakes.get_snakes()[0].locations, snake_location_moved)) actions = [[Snake.DOWN]] simulate_snake(env, actions, render=should_render()) snake_location_moved = [(3, 4), (3, 3), (4, 3)] self.assertTrue( np.array_equal(env.snakes.get_snakes()[0].locations, snake_location_moved)) actions = [[Snake.RIGHT]] simulate_snake(env, actions, render=should_render()) snake_location_moved = [(3, 3), (4, 3), (4, 4)] self.assertTrue( np.array_equal(env.snakes.get_snakes()[0].locations, snake_location_moved)) env.close()
def __init__(self, num_agents, map_height): observation_type = "max-bordered-51s" self.env = BattlesnakeGym( observation_type=observation_type, number_of_snakes=num_agents, map_size=(map_height, map_height)) if "bordered" in observation_type: if "max-bordered" in observation_type: self.observation_height = self.MAX_MAP_HEIGHT else: # If only bordered with 2 rows of -1 self.observation_height = map_height + 2 else: # Flat without border self.observation_height = map_height self.action_space = self.env.action_space[0] self.observation_space = gym.spaces.Box(low=-1.0, high=5.0, shape=(self.observation_height, self.observation_height, 6), dtype=np.float32) self.num_agents = num_agents self.observation_type = observation_type self.old_obs1 = {}
def run(args): max_time_steps = 1000 map_size = tuple(args.map_size) number_of_snakes = args.number_of_snakes state_shape = (map_size[0], map_size[1], (1 + number_of_snakes)) env = BattlesnakeGym(map_size=map_size, number_of_snakes=number_of_snakes) agents = [] for i in range(number_of_snakes): agent = Agent_mxnet(state_shape=state_shape, action_size=4, seed=0) agent.qnetwork_local.load_parameters(args.model_name.format(i), ctx=ctx) state = env.reset() for t in range(max_time_steps): actions = [] for i in range(number_of_snakes): state_agent_i = sort_states_for_snake_id(state, i + 1) action = agent.act(state_agent_i, eps=0) actions.append(action) next_state, reward, done, _ = env.step(np.array(actions)) env.render(mode=args.render_mode) time.sleep(0.2) state = next_state if done: break
def test_gym_performance(map_sizes, number_of_snakes): """ Measure the performance of the gym in steps per seconds :param map_sizes: [()] :param number_of_snakes: [] :return: """ max_turns_num = 100 for map_size in map_sizes: for num_snakes in number_of_snakes: # Create the snakes one after each others snake_locations = [] i = 0 j = 0 for _ in range(num_snakes): snake_locations.append([i, j]) if j >= map_size[1] - 2: i += 1 j = 0 else: j += 1 if i >= map_size[0] - 2: raise Exception( "Incompatible map size and number of snakes") # Create the food always in the same spot food_location = [(map_size[0] - 1, map_size[1] - 1) ] * max_turns_num * max_turns_num print(snake_locations) # Create the gym env = BattlesnakeGym(map_size=map_size, number_of_snakes=num_snakes, snake_spawn_locations=snake_locations, food_spawn_locations=food_location) actions = [[Snake.RIGHT] * num_snakes, [Snake.DOWN] * num_snakes, [Snake.LEFT] * num_snakes, [Snake.UP] * num_snakes ] * max_turns_num tic = time.time() _, _, _, info = simulate_snake(env, actions, render=False) toc = time.time() print( "Map Size {}, Num Snake {}, Num Turns {}, Total time: {:.4f}s, Steps per seconds {:.4f}" .format(map_size, num_snakes, info['current_turn'], toc - tic, info['current_turn'] / (toc - tic)))
class MultiAgentBattlesnake(MultiAgentEnv): MAX_MAP_HEIGHT = 21 def __init__(self, num_agents, map_height, heuristics, rewards=SimpleRewards()): observation_type = "max-bordered-51s" self.env = BattlesnakeGym( observation_type=observation_type, number_of_snakes=num_agents, map_size=(map_height, map_height), rewards=rewards) self.observation_height = self.MAX_MAP_HEIGHT self.action_space = self.env.action_space[0] gym_observation_space = gym.spaces.Box(low=-1.0, high=5.0, shape=(self.observation_height, self.observation_height, 6), dtype=np.float32) self.observation_space = gym.spaces.Dict({ "action_mask": gym.spaces.Box(0, 1, shape=(4,), dtype=np.float32), "state": gym_observation_space}) self.num_agents = num_agents self.observation_type = observation_type self.old_obs1 = {} self.heuristics = heuristics if len(self.heuristics) > 0: self.battlesnake_heuristics = MyBattlesnakeHeuristics() self.heuristics_list = [] for heuristic_name in self.heuristics: if heuristic_name == "banned_forbidden_moves": self.heuristics_list.append(self.battlesnake_heuristics.banned_forbidden_moves) elif heuristic_name == "banned_wall_hits": self.heuristics_list.append(self.battlesnake_heuristics.banned_wall_hits) self.rewards = rewards def set_effective_map_size(self, eff_map_size): self.__init__(self.num_agents, eff_map_size, self.heuristics, self.rewards) self.reset() def reset(self): self.mask = {} new_obs, _, _, info = self.env.reset() obs = {} # add empty map placeholders for use until we've seen 2 steps empty_map = np.zeros((self.observation_height, self.observation_height, 3)) new_obs = np.array(new_obs, dtype=np.float32) for i in range(self.num_agents): agent_id = "agent_{}".format(i) obs_i = sort_states_for_snake_id(new_obs, i+1) merged_map = np.concatenate((empty_map, obs_i), axis=-1) if len(self.heuristics) > 0: health = {k: 100 for k in range(self.num_agents)} mask = self.battlesnake_heuristics.get_action_masks_from_functions( obs_i, i, 0, health, self.env, functions=self.heuristics_list) else: mask = np.array([1, 1, 1, 1]) obs[agent_id] = {"state": merged_map, "action_mask": mask} self.mask[agent_id] = obs[agent_id]["action_mask"] self.old_obs1[agent_id] = obs_i return obs def step(self, action_dict): actions = [] for key, value in sorted(action_dict.items()): actions.append(value) o, r, d, info = self.env.step(actions) rewards = {} obs = {} infos = {} for i, key in enumerate(sorted(action_dict.keys())): old_obs1 = self.old_obs1[key] obs_i = np.array(o, dtype=np.float32) obs_i = sort_states_for_snake_id(obs_i, i+1) merged_map = np.concatenate((old_obs1, obs_i), axis=-1) infos[key] = info rewards[key] = r[i] if len(self.heuristics) > 0 and self.env.snakes.get_snakes()[i].is_alive(): turn_count = info["current_turn"]+1 health = info["snake_health"] mask = self.battlesnake_heuristics.get_action_masks_from_functions( obs_i, i, turn_count, health, self.env, functions=self.heuristics_list) else: mask = np.array([1, 1, 1, 1]) obs[key] = {"state": merged_map, "action_mask": mask} self.old_obs1[key] = np.array(obs_i, dtype=np.float32) self.mask[key] = obs[key]["action_mask"] dead_count = 0 for x in range(self.num_agents): if d[x] == True: dead_count += 1 dones = {'__all__': dead_count >= self.num_agents-1} return obs, rewards, dones, infos
def run(seed, args): print("Running with seed = {}".format(seed)) map_size = json.loads(args.map_size) # Initialise logging if args.model_dir is None: # Check if the model is running in Sagemaker if "SM_MODEL_DIR" in os.environ: model_dir = os.environ['SM_MODEL_DIR'] else: model_dir = "params" # Check if the model is running in Sagemaker load = args.load if 'SM_CHANNEL_WEIGHTS' in os.environ and load is not None: load = os.environ['SM_CHANNEL_WEIGHTS'] + "//" + load if args.writer: writer = SummaryWriter("logs/{}-seed{}".format(run_name, seed), verbose=False) else: writer = None # Initialise the environment env = BattlesnakeGym(map_size=map_size, observation_type=args.snake_representation) env.seed(seed) # Initialise agent if args.state_type == "layered": state_depth = 1 + args.number_of_snakes elif args.state_type == "one_versus_all": state_depth = 3 if "bordered" in args.snake_representation: state_shape = (map_size[0] + 2, map_size[1] + 2, state_depth) else: state_shape = (map_size[0], map_size[1], state_depth) agent_params = ( seed, model_dir, load, args.load_only_conv_layers, args.models_to_save, # State configurations args.state_type, state_shape, args.number_of_snakes, # Learning configurations args.buffer_size, args.update_every, args.lr_start, args.lr_step, args.lr_factor, args.gamma, args.tau, args.batch_size, # Network configurations args.qnetwork_type, args.sequence_length, args.starting_channels, args.number_of_conv_layers, args.number_of_dense_layers, args.number_of_hidden_states, args.depthS, args.depth, args.kernel_size, args.repeat_size, args.activation_type) agent = MultiAgentsCollection(*agent_params) trainer(env, agent, args.number_of_snakes, args.run_name, args.episodes, args.max_t, args.warmup, args.eps_start, args.eps_end, args.eps_decay, args.print_score_steps, args.save_only_best_models, args.save_model_every, args.render_steps, args.should_render, args.writer, args.print_progress)
class MultiAgentBattlesnake(MultiAgentEnv): MAX_MAP_HEIGHT = 21 def __init__(self, num_agents, map_height): observation_type = "max-bordered-51s" self.env = BattlesnakeGym( observation_type=observation_type, number_of_snakes=num_agents, map_size=(map_height, map_height)) if "bordered" in observation_type: if "max-bordered" in observation_type: self.observation_height = self.MAX_MAP_HEIGHT else: # If only bordered with 2 rows of -1 self.observation_height = map_height + 2 else: # Flat without border self.observation_height = map_height self.action_space = self.env.action_space[0] self.observation_space = gym.spaces.Box(low=-1.0, high=5.0, shape=(self.observation_height, self.observation_height, 6), dtype=np.float32) self.num_agents = num_agents self.observation_type = observation_type self.old_obs1 = {} def set_effective_map_size(self, eff_map_size): self.__init__(self.num_agents, eff_map_size) self.reset() def reset(self): new_obs, _, _, info = self.env.reset() obs = {} # add empty map placeholders for use until we've seen 2 steps empty_map = np.zeros((self.observation_height, self.observation_height, 3)) new_obs = np.array(new_obs, dtype=np.float32) for i in range(self.num_agents): agent_id = "agent_{}".format(i) obs_i = sort_states_for_snake_id(new_obs, i+1) merged_map = np.concatenate((empty_map, obs_i), axis=-1) if self.num_agents > 1: obs[agent_id] = merged_map else: obs[agent_id] = merged_map self.old_obs1[agent_id] = obs_i return obs def render(self): self.env.render() def step(self, action_dict): actions = [] for key, value in sorted(action_dict.items()): actions.append(value) o, r, d, info = self.env.step(actions) rewards = {} obs = {} infos = {} for i, key in enumerate(sorted(action_dict.keys())): old_obs1 = self.old_obs1[key] o_i = np.array(o, dtype=np.float32) o_i = sort_states_for_snake_id(o_i, i+1) merged_map = np.concatenate((old_obs1, o_i), axis=-1) infos[key] = info if self.num_agents > 1: rewards[key] = r[i] obs[key] = merged_map else: rewards[key] = r obs[key] = merged_map self.old_obs1[key] = np.array(o_i, dtype=np.float32) dead_count = 0 for x in range(self.num_agents): if d[x] == True: dead_count += 1 dones = {'__all__': dead_count >= self.num_agents-1} return obs, rewards, dones, infos