def decode_move_index(self, index): """Turn an integer index into a board point.""" if index == self._pass_idx: return Move.pass_turn() row = index // self._board_size col = index % self._board_size return Move.play(Point(row=row + 1, col=col + 1))
def remove_dead_stones_and_score(game_state): b_resign = False w_resign = False if game_state.last_move.is_resign: if game_state.next_player == Player.black: w_resign = True else: b_resign = True if b_resign or w_resign: return GameResult( b_resign, w_resign, 0.0, 0.0, game_state.komi(), game_state.board) end_game = game_state while end_game.last_move == Move.pass_turn(): end_game = end_game.previous_state final_board = remove_dead_stones(end_game) final_state = GameState.from_board( final_board, game_state.next_player, game_state.komi()) final_state = final_state.apply_move(Move.pass_turn()) final_state = final_state.apply_move(Move.pass_turn()) return compute_game_result(final_state)
def decode_sgf_move(sgf_move, num_rows): if sgf_move == '': return Move.pass_turn() assert len(sgf_move) == 2 col = ALPHABET.index(sgf_move[0]) + 1 row = num_rows - ALPHABET.index(sgf_move[1]) return Move.play(Point(row, col))
def test_move_is_hashable(self): moves = { Move.play(Point(1, 1)): 1, Move.resign(): 2, } self.assertEqual(1, moves[Move.play(Point(1, 1))]) self.assertEqual(2, moves[Move.resign()])
def test_create_game_from_board(self): board = Board(5, 5) board.place_stone(Player.black, Point(2, 2)) board.place_stone(Player.black, Point(4, 4)) game = GameState.from_board(board, Player.white) self.assertEqual(Player.white, game.next_player) self.assertFalse(game.is_valid_move(Move.play(Point(2, 2)))) self.assertTrue(game.is_valid_move(Move.play(Point(3, 3))))
def read_move(self): is_play = self.read_bool() is_pass = self.read_bool() is_resign = self.read_bool() if is_play: row = self.read_int() col = self.read_int() return Move.play(Point(row=row, col=col)) if is_pass: return Move.pass_turn() assert is_resign return Move.resign()
def test_new_game(self): start = GameState.new_game(19) next_state = start.apply_move(Move.play(Point(16, 16))) self.assertEqual(start, next_state.previous_state) self.assertEqual(Player.white, next_state.next_player) self.assertEqual(Player.black, next_state.board.get(Point(16, 16)))
def test_ko_as_array(self): start = GameState.new_game(19) # .wb. # wb.b # .wb. # .... game = start.apply_move(Move.play(Point(1, 3))) game = game.apply_move(Move.play(Point(1, 2))) game = game.apply_move(Move.play(Point(2, 2))) game = game.apply_move(Move.play(Point(2, 1))) game = game.apply_move(Move.play(Point(3, 3))) game = game.apply_move(Move.play(Point(3, 2))) game = game.apply_move(Move.play(Point(2, 4))) # W takes the ko game = game.apply_move(Move.play(Point(2, 3))) ko_array = game.ko_points_as_array() self.assertEqual((19, 19), ko_array.shape) self.assertEqual(1, ko_array[1, 1]) self.assertEqual(0, ko_array[2, 1]) self.assertEqual(0, ko_array[5, 5])
def test_legal_move_mask(self): start = GameState.new_game(19) # .wb. # wb.b # .wb. # .... game = start.apply_move(Move.play(Point(1, 3))) game = game.apply_move(Move.play(Point(1, 2))) game = game.apply_move(Move.play(Point(2, 2))) game = game.apply_move(Move.play(Point(2, 1))) game = game.apply_move(Move.play(Point(3, 3))) game = game.apply_move(Move.play(Point(3, 2))) game = game.apply_move(Move.play(Point(2, 4))) # W takes the ko game = game.apply_move(Move.play(Point(2, 3))) legal_moves = game.legal_moves_as_array() self.assertEqual((19 * 19 + 1, ), legal_moves.shape) illegal_indices = [ # Suicide 19 * 0 + 0, # Stones here 19 * 0 + 2, 19 * 0 + 1, 19 * 1 + 0, 19 * 2 + 2, 19 * 2 + 1, 19 * 1 + 3, 19 * 1 + 2, # ko 19 * 1 + 1, ] for i, val in enumerate(legal_moves): if i in illegal_indices: self.assertEqual(0, val, "{} should be illegal".format(i)) else: self.assertEqual(1, val, "{} should be legal".format(i))
def encode(self, game_state): board_tensor = np.zeros(self.shape()) if game_state.next_player == Player.black: board_tensor[8] = 1 else: board_tensor[9] = 1 for r in range(self._board_size): for c in range(self._board_size): p = Point(row=r + 1, col=c + 1) go_string = game_state.board.get_string(p) if go_string is None: if game_state.does_move_violate_ko(Move.play(p)): board_tensor[10][r][c] = 1 else: liberty_plane = min(4, go_string.num_liberties) - 1 if go_string.color == Player.white: liberty_plane += 4 board_tensor[liberty_plane][r][c] = 1 return board_tensor
def test_komi(self): start = GameState.new_game(19, 0.5) next_state = start.apply_move(Move.play(Point(16, 16))) self.assertAlmostEqual(0.5, next_state.komi())
def test_ko(self): start = GameState.new_game(19) # .wb. # wb.b # .wb. # .... game = start.apply_move(Move.play(Point(1, 3))) game = game.apply_move(Move.play(Point(1, 2))) game = game.apply_move(Move.play(Point(2, 2))) game = game.apply_move(Move.play(Point(2, 1))) game = game.apply_move(Move.play(Point(3, 3))) game = game.apply_move(Move.play(Point(3, 2))) game = game.apply_move(Move.play(Point(2, 4))) # W takes the ko game = game.apply_move(Move.play(Point(2, 3))) # B can't take back self.assertTrue(game.does_move_violate_ko(Move.play(Point(2, 2)))) self.assertFalse(game.is_valid_move(Move.play(Point(2, 2)))) # "ko threat" game = game.apply_move(Move.play(Point(19, 19))) game = game.apply_move(Move.play(Point(18, 18))) # B can take now self.assertFalse(game.does_move_violate_ko(Move.play(Point(2, 2)))) self.assertTrue(game.is_valid_move(Move.play(Point(2, 2))))
def test_last_move(self): start = GameState.new_game(19) next_move = Move.play(Point(16, 16)) state = start.apply_move(next_move) self.assertEqual(Move.play(Point(16, 16)), state.last_move)
def test_move_number(self): start = GameState.new_game(19) self.assertEqual(0, start.num_moves) game = start.apply_move(Move.play(Point(1, 3))) game = game.apply_move(Move.play(Point(1, 2))) self.assertEqual(2, game.num_moves)
def test_is_valid_move(self): start = GameState.new_game(19) state = start.apply_move(Move.play(Point(16, 16))) self.assertTrue(state.is_valid_move(Move.play(Point(16, 17)))) self.assertFalse(state.is_valid_move(Move.play(Point(16, 16))))
def select_move(self, game_state): start = time.time() self.root = None if self._ladder_rollouts > 0: self.root = self.read_ladders(game_state, self._ladder_rollouts) if self.root is None: self.root = self.create_node(game_state, add_noise=True) num_rollouts = 0 while num_rollouts < self._num_rollouts: to_expand = set() batch_count = 0 while batch_count < self._batch_size: # Find a leaf. node = self.root move = self.select_branch(node) while node.has_child(move): node.add_virtual_loss(move) node = node.get_child(move) move = self.select_branch(node) node.add_virtual_loss(move) batch_count += 1 to_expand.add((node, move)) batch_num_visits = len(to_expand) new_children = self.create_children(to_expand) for new_child in new_children: new_child.parent.record_visit(new_child.move, new_child.value) num_rollouts += batch_num_visits # Now select a move in proportion to how often we visited it. visit_counts = self.root.visit_counts expected_values = calc_expected_values(self.root.total_values, visit_counts) tiebreak = 0.499 * (expected_values + 1) decide_vals = visit_counts + tiebreak for move_idx in np.argsort(decide_vals): visit_count = visit_counts[move_idx] if visit_count > 0: sys.stderr.write('{}: {:.3f} {}\n'.format( format_move(self._encoder.decode_move_index(move_idx)), expected_values[move_idx], visit_count)) temperature = self._temp_schedule.get(game_state.num_moves) if temperature > 0: move_indices, = np.where(visit_counts > 0) raw_counts = decide_vals[move_indices] p = np.power(raw_counts, 1.0 / temperature) p /= np.sum(p) move_index = np.random.choice(move_indices, p=p) else: move_index = np.argmax(decide_vals) self._log_pv(self.root) chosen_move = self._encoder.decode_move_index(move_index) sys.stderr.write('Select {} Q {:.3f}\n'.format( format_move(chosen_move), expected_values[move_index])) end = time.time() sys.stderr.write('Decided in {:.3f}s\n'.format(end - start)) sys.stderr.flush() if expected_values[move_index] < self._resign_below: sys.stderr.write('Resigning because Q {:.3f} < {:.3f}\n'.format( expected_values[move_index], self._resign_below)) return Move.resign() if self._gracious_winner is not None: if game_state.last_move is not None and game_state.last_move == Move.pass_turn( ): pass_idx = self._encoder.encode_move(Move.pass_turn()) if visit_counts[pass_idx] >= 2 and \ expected_values[pass_idx] > self._gracious_winner: sys.stderr.write('Pass has Q {:.3f}\n'.format( expected_values[pass_idx])) return Move.pass_turn() return chosen_move