def select_move(self, game_state): num_moves = self._encoder.board_width * self._encoder.board_height move_probs = self.predict(game_state) move_probs = move_probs[0].asnumpy() # increase the distance between the move likely and least likely moves move_probs = move_probs**3 # prevent move probs from getting stuck at 0 or 1 eps = 1e-6 move_probs = np.clip(move_probs, eps, 1 - eps) # re-normalize to get another probability distribution move_probs = move_probs / np.sum(move_probs) # turn the probabilities into a ranked list of moves. candidates = np.arange(num_moves) # sample potential candidates ranked_moves = np.random.choice(candidates, num_moves, replace=False, p=move_probs) for point_idx in ranked_moves: point = self._encoder.decode_point_index(point_idx) # starting from the top, find a valid move that doesn't reduce eye-space if game_state.is_valid_move(Move.play(point)) and \ not is_point_an_eye(game_state.board, point, game_state.next_player): return Move.play(point) # if no legal and non-self-destructive moves are left, pass return Move.pass_turn()
def select_move(self, game_state): # choose a random valid move that preserves our own eyes candidates = [] for r in range(1, game_state.board.num_rows + 1): for c in range(1, game_state.board.num_cols + 1): candidate = Point(row=r, col=c) if game_state.is_valid_move(Move.play(candidate)) and\ not is_point_an_eye(game_state.board, candidate, game_state.next_player): candidates.append(candidate) if not candidates: return Move.pass_turn() return Move.play(random.choice(candidates))
def select_move(self, game_state): # choose a random valid move that preserves our own eyes dim = (game_state.board.num_rows, game_state.board.num_cols) if dim != self.dim: self._update_cache(dim) idx = np.arange(len(self.point_cache)) np.random.shuffle(idx) for i in idx: p = self.point_cache[i] if game_state.is_valid_move(Move.play(p)) and \ not is_point_an_eye(game_state.board, p, game_state.next_player): return Move.play(p) return Move.pass_turn()
def main(): game = GameState.new_game(19) checkpoint_p = Path('./checkpoints/').resolve().joinpath('betago.params') ctx = mx.gpu() agent = BetaGoAgent.create(checkpoint_p, ctx) #for i in range(3): # human_move = 'A%d' % (i+1) # print(human_move) # point = point_from_coords(human_move.strip()) # print(point) # move = Move.play(point) # game = game.apply_move(move) # move = agent.select_move(game) # game = game.apply_move(move) # print_board(game.board) while not game.is_over(): # before each move, clear screen print(chr(27) + "[2J") # <2> print_board(game.board) if Player.black == game.next_player: human_move = input('-- ') point = point_from_coords(human_move.strip()) move = Move.play(point) else: move = agent.select_move(game) game = game.apply_move(move) print_board(game.board) winner = game.winner() if winner is None: print("It's a draw.") else: print('Winner: ' + str(winner))
def select_move(bot_name): content = request.json board_size = content['board_size'] game_state = GameState.new_game(board_size) # Replay the game up to this point. for move in content['moves']: if move == 'pass': next_move = Move.pass_turn() elif move == 'resign': next_move = Move.resign() else: next_move = Move.play(point_from_coords(move)) game_state = game_state.apply_move(next_move) bot_agent = bot_map[bot_name] bot_move = bot_agent.select_move(game_state) if bot_move.is_pass: bot_move_str = 'pass' elif bot_move.is_resign: bot_move_str = 'resign' else: bot_move_str = coords_from_point(bot_move.point) return jsonify({ 'bot_move': bot_move_str, 'diagnostics': bot_agent.diagnostics() })
def _encode_and_persist(self, sgf_p): sgf = SGFGame.from_string(sgf_p.read_text()) ## determine winner #winner = sgf.get_winner() #if winner is None: # print('no winner: %s' % sgf_p.name) # return # determine the initial game state by applying all handicap stones game_state, first_move_done = self._get_handicap(sgf) label = [] data = [] # iterate over all moves in the SGF (game) for item in sgf.main_sequence_iter(): color, move_tuple = item.get_move() point = None if color is not None: if move_tuple is not None: # get coordinates of this move row, col = move_tuple point = Point(row + 1, col + 1) move = Move.play(point) # allow only valid moves if not game_state.is_valid_move(move): print('invalid move: %s' % sgf_p.name) return else: # pass move = Move.pass_turn() if first_move_done and point is not None: # use only winner's moves #if first_move_done and point is not None and winner == color: # encode the current game state as feature d = self.encoder.encode(game_state) # next move is the label for the this feature l = self.encoder.encode_point(point) data.append(d) label.append(l) # apply move to board and proceed with next one game_state = game_state.apply_move(move) first_move_done = True # create numpy compressed file size = len(data) if 0 == size: print('empty: %s' % sgf_p.name) return assert len(label) == size, 'label with invalid size' assert len(data) == size, 'data with invalid size' npz_p = self.processed_p.joinpath('%s-%s-%d' % (self.encoder.name(), sgf_p.stem, size)) label = np.array(label, dtype=np.int) data = np.array(data, dtype=np.int) np.savez_compressed(str(npz_p), d=data, l=label)
def _get_handicap(self, sgf): board = Board(19, 19) first_move_done = False game_state = GameState.new_game(19) if sgf.get_handicap() is not None: point = None for setup in sgf.get_root().get_setup_stones(): for move in setup: row, col = move point = Point(row + 1, col + 1) board.place_stone(Player.black, point) first_move_done = True if point is not None: game_state = GameState(board, Player.white, None, Move.play(point)) return game_state, first_move_done
def main(): board_size = 5 game = GameState.new_game(board_size) bot = MCTSAgent(num_rounds=500, temperature=1.4) while not game.is_over(): # before each move, clear screen print(chr(27) + "[2J") # <2> print_board(game.board) if Player.black == game.next_player: human_move = input('-- ') point = point_from_coords(human_move.strip()) move = Move.play(point) else: move = bot.select_move(game) game = game.apply_move(move) print_board(game.board) winner = game.winner() if winner is None: print("It's a draw.") else: print('Winner: ' + str(winner))
def encode(self, game_state): # fill a matrix with 1 if the point contains one of the current player's stones, # -1 if the point contains the opponent's stones and 0 if the point is empty board_tensor = np.zeros(self.shape(), dtype='int') base_plane = { game_state.next_player: 0, game_state.next_player.other: 3 } for row in range(self.board_height): for col in range(self.board_width): p = Point(row=row + 1, col=col + 1) go_string = game_state.board.get_string(p) if go_string is None: if game_state.does_move_violate_ko(game_state.next_player, Move.play(p)): # encode KO board_tensor[6][row][col] = 1 else: liberty_plane = min(3, go_string.num_liberties) - 1 liberty_plane += base_plane[go_string.color] # encode based on liberties board_tensor[liberty_plane][row][col] = 1 return board_tensor