class Engine(): def __init__(self, argv): opts, args = main(argv) self.game = Ants(get_game_opts(opts,args)) self.agent_moves = {} self.game.start_game() # TODO: might move self.screen_shape = (self.game.height, self.game.width) def reset(self): self.game.restart() self.agent_moves = {} self.game.start_game() def do_move(self, loc, direction): """ Execute the agent's action. Since action is choice of ant and direction, here we insert the intermediery states. :param loc: (row, col) :param direction: n,e,s,w :return: reward """ if self.game.map[loc[0]][loc[1]] != RL_AGENT or loc in self.agent_moves: return -1 if len(self.agent_moves) == 0: self.game.start_turn() self.agent_moves[loc] = direction if len(self.agent_moves) == len(self.game.player_ants(RL_AGENT)): orders = [] for (row, col) in self.agent_moves: orders.append("o %s %s %s" % (row, col, self.agent_moves[(row,col)])) print self.game.do_moves(RL_AGENT, orders) self.do_bots_move() self.game.finish_turn() self.agent_moves = {} # TODO: use ignored for reward calculations? return 0 def do_bots_move(self): pass def game_over(self): """ Check if our agent is still in game :return: """ return not self.game.is_alive(RL_AGENT) or self.game.game_over() def get_observation(self): screen = np.array(self.game.get_perspective(RL_AGENT)) # TODO: add temp states for already moved ants return screen
class AntsEnv(gym.Env): """ Open AI gym environment for the Ants game. Args: - game_opts (AntsEnvOptions): Options to customize the game. - enemies ([Bot]): A list of enemy bots. - reward_func (RewardFunc): A RewardFunc object to calculate the reward at each step. - agent_names ([str]): A list of names for each agent. Properties: - observation_space: Box of shape (n, 5 + num_players, game_hight, game_width) of type np.uint8 where n is the number of agents. Most cell values are 0, or 1 to represent True, or False respectively. There are (4 + num_player) channels for the following: - Channel 0: If the cell is visible to the agent. - Channel 1: If the cell is land (else water). - Channel 2: If the cell has food. - Channel 3: If the cell has the agent's ant. - Channel 4: If the cell has an ant hill, then the player num (starting from 1) of the hill owner. Else 0. - Channel 5: If the cell has an ant which died the previous turn, then the player num (starting from 1) of the dead ant. Else 0. - Channel 6 and greater: If the cell has an enemy ant (one channel per enemy player). - action_space: Box of shape (n, game_height, game_width) with 0 <= cell value <= 4. 'n' is the number of agents. The cell value represents the move to make with the ant at each cell position for each agent: - 0: If the cell position has no player ants then don't move. - 1: Move 1 step north. - 2: Move 1 step east. - 3: Move 1 step south. - 4: Move 1 step west. """ # Observation channels CHANNEL_IS_VISIBLE = 0 CHANNEL_IS_LAND = 1 CHANNEL_HAS_FOOD = 2 CHANNEL_ANT_HILL = 3 CHANNEL_AGENT_ANT = 4 CHANNEL_DEAD_ANTS = 5 CHANNEL_ENEMY_ANT_START = 6 # Action types NUM_ACTIONS = 5 ACTION_DONT_MOVE = 0 ACTION_MOVE_NORTH = 1 ACTION_MOVE_EAST = 2 ACTION_MOVE_SOUTH = 3 ACTION_MOVE_WEST = 4 def __init__(self, game_opts, enemies, reward_func, agent_names=None): self.game_opts = game_opts self.enemies = enemies self.reward_func = reward_func self.game = Ants(self.game_opts.as_dict()) if len(self.enemies) >= self.game.num_players: raise ValueError( 'The number of enemies should be strictly less than the number of players. Otherwise there will be no agent to play against.' ) self.num_agents = self.game.num_players - len(self.enemies) if not agent_names: self.agent_names = [f'Agent {i}' for i in range(self.num_agents)] elif len(agent_names) != self.num_agents: raise ValueError( f'len(agent_names) == {len(agent_names)} should be 0 or equal to the number of agents == {self.num_agents}.' ) else: self.agent_names = agent_names self.observation_space = gym.spaces.Box( low=0, high=1, shape=(self.num_agents, 5 + self.game.num_players, self.game.height, self.game.width), dtype=np.uint8) self.action_space = gym.spaces.Box(low=0, high=4, shape=(self.num_agents, self.game.height, self.game.width), dtype=np.uint8) self.reset(self.game) def step(self, action): """ Run one timestep of the environment's dynamics. Accepts an action and returns a tuple (observation, reward, done, info). Args: - action (object): An action provided by the environment. If one of the agents is no longer alive, but the game is still going on, then that player's action is ignored. Returns: observation (object): Agent's observation of the current environment. reward ([float]) : Amount of reward returned after previous action for each agent. done (boolean): Whether the episode has ended, in which case further step() calls will throw an exception. info ([object]): The metadata with wich the reward was calculated per agent. """ if action.shape != self.action_space.shape: raise Exception( f'Shape of action {action.shape} should be same as shape of action space {self.action_space.shape}' ) if self.is_done: raise Exception( "Can't make moves on a game that has finished. Reset board to continue." ) reward_inputs = RewardInputs(self.num_agents) \ .set_game(self.game) reward_inputs.set_old_state(np.copy(self._current_state)) # Play moves for each bot in a random order self.game.start_turn() player_order = list(range(self.game.num_players)) np.random.shuffle(player_order) for player_num in player_order: if not self.game.is_alive(player_num): continue if 0 <= player_num < self.num_agents: moves = self._action_to_moves(action[player_num]) else: enemy_ind = player_num - self.num_agents moves = self.enemies[enemy_ind].get_moves() valid, ignored, invalid = self.game.do_moves(player_num, moves) if 0 <= player_num < self.num_agents: reward_inputs.set_ignored_moves(player_num, self._parse_bad_moves(ignored)) reward_inputs.set_invalid_moves(player_num, self._parse_bad_moves(invalid)) self.game.finish_turn() if self.game.game_over() or self.game.turn > self.game.turns: self.is_done = True self.game.finish_game() for i, enemy in enumerate(self.enemies): player_num = i + self.num_agents if self.game.is_alive(player_num): enemy.update_map(self.game.get_player_state(player_num)) obs = self._get_observations() # Calculate reward reward_inputs.set_new_state(obs) reward, info = self.reward_func(reward_inputs) return obs, reward, self.is_done, info def reset(self, init_game=None): self.reward_func.reset() self.game = init_game or Ants(self.game_opts.as_dict()) self.is_done = False self.game.start_game() for i, enemy in enumerate(self.enemies): enemy.reset() enemy.setup(self.game.get_player_start(i + self.num_agents)) enemy.update_map(self.game.get_player_state(i + self.num_agents)) obs = self._get_observations(reset=True) return obs def visualize(self, game_result=None): """ Visualize a game till the current state. Opens the game in a new browser, and will start visualization from the start, so is not the same as render. Args: - game_result: The game result to visualize. Default: None. Uses current game_result if None. """ game_result = game_result or self.get_game_result() visualizer.launch( game_result_json=json.dumps(game_result, sort_keys=True)) def get_game_result(self): """ Get the result of the game. """ game_result = { 'challenge': self.game.__class__.__name__.lower(), 'score': self.game.get_scores(), 'replayformat': 'json', 'replaydata': self.game.get_replay(), 'playernames': [name for name in self.agent_names] + [enemy.name for enemy in self.enemies] } return game_result def _get_observations(self, reset=False): if reset: self._current_state = np.zeros(self.observation_space.shape, dtype=self.observation_space.dtype) # Set all cells to land. self._current_state[:, AntsEnv.CHANNEL_IS_LAND] = 1 for i in range(self.num_agents): if not self.game.is_alive(i): continue state = self.game.get_player_state(i) self._update_observation(i, state) return np.copy(self._current_state) def _update_observation(self, agent_num, player_state_str): obs = self._current_state[agent_num] # clear all transient entities. obs[AntsEnv.CHANNEL_IS_VISIBLE] = 0 obs[AntsEnv.CHANNEL_AGENT_ANT] = 0 obs[AntsEnv.CHANNEL_DEAD_ANTS] = 0 obs[AntsEnv.CHANNEL_ENEMY_ANT_START:] = 0 obs[AntsEnv.CHANNEL_ANT_HILL] = 0 obs[AntsEnv.CHANNEL_HAS_FOOD] = 0 # update map for line in player_state_str.strip().split('\n'): line = line.strip().lower() tokens = line.split() key, (row, col) = tokens[0], map(int, tokens[1:3]) owner = None if len(tokens) <= 3 else int(tokens[3]) obs[AntsEnv.CHANNEL_IS_VISIBLE, row, col] = 1 if key == 'w': obs[AntsEnv.CHANNEL_IS_LAND, row, col] = 0 elif key == 'f': obs[AntsEnv.CHANNEL_HAS_FOOD, row, col] = 1 elif 0 <= owner < self.num_agents: player_num = owner + 1 if key == 'h': obs[AntsEnv.CHANNEL_ANT_HILL, row, col] = player_num elif key == 'a': obs[AntsEnv.CHANNEL_AGENT_ANT, row, col] = 1 self._update_visibility(obs, row, col) elif key == 'd': obs[AntsEnv.CHANNEL_DEAD_ANTS, row, col] = player_num elif owner is not None: player_num = owner + 1 if key == 'h': obs[AntsEnv.CHANNEL_ANT_HILL, row, col] = owner elif key == 'a': channel = AntsEnv.CHANNEL_ENEMY_ANT_START + owner - self.num_agents obs[channel, row, col] = 1 elif key == 'd': obs[AntsEnv.CHANNEL_DEAD_ANTS, row, col] = player_num return obs def _action_to_moves(self, action): moves = [] for row in range(action.shape[0]): for col in range(action.shape[1]): if action[row, col] == AntsEnv.ACTION_DONT_MOVE: pass elif action[row, col] == AntsEnv.ACTION_MOVE_NORTH: moves.append(f'o {row} {col} n') elif action[row, col] == AntsEnv.ACTION_MOVE_EAST: moves.append(f'o {row} {col} e') elif action[row, col] == AntsEnv.ACTION_MOVE_SOUTH: moves.append(f'o {row} {col} s') elif action[row, col] == AntsEnv.ACTION_MOVE_WEST: moves.append(f'o {row} {col} w') else: raise ValueError( f'action[{row}, {col}] = {action[row, col]} is not a valid move.' ) return moves def _update_visibility(self, obs, row, col): """ Updates the IS_VISIBLE channel given the location of an ant. """ h, w = obs.shape[1:] view_radius_sq = self.game_opts.view_radius_sq view_radius = int(view_radius_sq**0.5) offsets = [] for off_row in range(-view_radius, view_radius + 1): for off_col in range(-view_radius, view_radius + 1): dist = off_row**2 + off_col**2 if dist <= view_radius_sq: obs[AntsEnv.CHANNEL_IS_VISIBLE, (row + off_row) % h, (col + off_col) % w] = 1 def _parse_bad_moves(self, lines): store = np.zeros(self.action_space.shape[1:], dtype=self.action_space.dtype) for line in lines: tokens = line.strip().split() row, col = int(tokens[1]), int(tokens[2]) d = tokens[3] if d == 'n': d = 1 elif d == 'e': d = 2 elif d == 's': d = 3 elif d == 'w': d = 4 else: raise ValueError(f'Unexpected entry {line} in moves.') store[row, col] = d return store
class TcpGame(object): def __init__( self, db, opts, map_name, nplayers ): self.db = db self.id = db.latest self.opts = opts self.players = [] self.bot_status = [] self.ants = Ants(opts) self.map_name = map_name self.timestep = 0 self.nplayers = nplayers #~ def __del__(self): #~ log.info( "rip, game " + str(self.id) ) def step( self ): t = time() timed_out = (t - self.timestep > self.opts['turntime']*0.001) inp_ok = 0 for i,p in enumerate(self.players): if not self.ants.is_alive(i): continue if not p.sock: continue if p.poll(): inp_ok += 1 if (inp_ok == len(self.players)) or timed_out: self.timestep = time() return self.do_turn() return True def turn(self): return "game " + str(self.id) + " turn " + str(self.ants.turn) + " : " def do_turn( self ): self.ants.start_turn() for i,p in enumerate(self.players): s = p.poll() #~ log.info( self.turn() + p.name + " : " + str(s) ) p.clear() if not self.ants.is_alive(i): if self.bot_status[i] != "eliminated": log.debug( self.turn() + p.name + " got eliminated !") p.write("INFO: "+p.name+" got eliminated !\n") self.bot_status[i] = "eliminated" continue if s == None: log.warning( self.turn() + p.name + " timed out !") if self.bot_status[i] != "timed out": log.debug( self.turn() + p.name + " timed out !") self.bot_status[i] = "timed out" p.write("INFO: "+p.name+" timed out !\n") continue self.bot_status[i] = "survived" try: valid, ignored, invalid = self.ants.do_moves(i, s.split('\r\n')) except: log.error("!!!!!!!! do_moves failed " + str(self.ants.turn) + " : " + str(p.name)) if ignored: txt = " ignored: " + str(ignored) p.write( "INFO: " + txt + '\n' ) #~ log.debug(self.turn() +p.name +txt) if invalid: txt = " invalid: " + str(invalid) p.write( "INFO: " + txt + '\n' ) #~ log.debug( self.turn() +p.name + txt) try: self.ants.finish_turn() except: log.error("!!!!!!!! finish_turn failed " + str(self.ants.turn) + " : " + str(self.id)) # finished ? if ( self.ants.turn >= self.ants.turns) or ( self.ants.game_over() ): try: self.ants.finish_game() self.save_game() except: log.error(" !!!!!!!!!! finish game failed " + str(self.ants.turn) + " : " + str(self.id) + " !!!!!!!!!!! ") return False # finished # alive else: for i,p in enumerate(self.players): if self.ants.is_alive(i) and p.sock: p.write( 'turn ' + str(self.ants.turn) + '\n' + self.ants.get_player_state(i) + "go\n" ) return True def save_game(self): log.info("saving game : " + str(self.id) ) scores = self.ants.get_scores() ranks = [sorted(set(scores), reverse=True).index(x) for x in scores] game_result = { 'challenge': 'ants', 'game_id': self.id, 'status': self.bot_status, 'score': scores, 'rank': ranks, 'replayformat': 'json', 'replaydata': self.ants.get_replay(), 'playernames': [], } for i,p in enumerate(self.players): game_result['playernames'].append(p.name) rep_name = "games/"+ str(self.id)+".replay" f = open( rep_name, 'w' ) json.dump(game_result,f) f.close() # add to game db data shared with the webserver g = GameData() g.id = self.id g.map = self.map_name g.date = asctime() plr = {} for i,p in enumerate(self.players): if p.name in self.db.players: player = self.db.players[p.name] else: player = PlayerData() player.name = p.name self.db.players[p.name] = player #~ player.games.append(g.id) player.ngames += 1 plr[p.name] = scores[i] g.players = plr self.db.games[g.id] = g # pop from list if there's too many games: if len(self.db.games) > int(self.opts['db_max_games']): k = self.db.games.keys().pop(0) del(self.db.games[k]) log.info("db : " + str(len(self.db.games)) + " games") log.info("db : " + str(len(self.db.players)) + " players") # send final game info to players: end_line = 'INFO: hope, you enjoyed game ' + str(self.id) + '.\n' end_line += 'INFO: players: ' for i,p in enumerate(self.players): end_line += ' ' + p.name end_line += '\nINFO: scores : %s\n' % ' '.join([str(s) for s in scores]) end_line += 'end\n' for i,p in enumerate(self.players): try: p.write( end_line ) except: continue self.calc_ranks( self.players, ranks ) def calc_ranks( self, players, ranks ): class TrueSkillPlayer(object): def __init__(self, name, skill, rank): self.name = name self.old_skill = skill self.skill = skill self.rank = rank ts_players = [] for i, p in enumerate(players): pdata = self.db.players[p.name] ts_players.append( TrueSkillPlayer(i, (pdata.mu,pdata.sigma), ranks[i] ) ) trueskill.AdjustPlayers(ts_players) for i, p in enumerate(players): pdata = self.db.players[p.name] pdata.mu = ts_players[i].skill[0] pdata.sigma = ts_players[i].skill[1] pdata.skill = pdata.mu - pdata.sigma * 3