def test_ko_move(self): start_board = utils_test.load_board(''' .OX...... OX....... ''' + EMPTY_ROW * 7) start_position = Position( utils_test.BOARD_SIZE, board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = utils_test.load_board(''' X.X...... OX....... ''' + EMPTY_ROW * 7) expected_position = Position( utils_test.BOARD_SIZE, board=expected_board, n=1, komi=6.5, caps=(2, 2), ko=coords.from_kgs(utils_test.BOARD_SIZE, 'B9'), recent=(PlayerMove(BLACK, coords.from_kgs(utils_test.BOARD_SIZE, 'A9')), ), to_play=WHITE, ) actual_position = start_position.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'A9')) self.assertEqualPositions(actual_position, expected_position) # Check that retaking ko is illegal until two intervening moves with self.assertRaises(go.IllegalMove): actual_position.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'B9')) pass_twice = actual_position.pass_move().pass_move() ko_delayed_retake = pass_twice.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'B9')) expected_position = Position( utils_test.BOARD_SIZE, board=start_board, n=4, komi=6.5, caps=(2, 3), ko=coords.from_kgs(utils_test.BOARD_SIZE, 'A9'), recent=( PlayerMove(BLACK, coords.from_kgs(utils_test.BOARD_SIZE, 'A9')), PlayerMove(WHITE, None), PlayerMove(BLACK, None), PlayerMove(WHITE, coords.from_kgs(utils_test.BOARD_SIZE, 'B9')), ), to_play=BLACK) self.assertEqualPositions(ko_delayed_retake, expected_position)
def test_move_with_capture(self): start_board = load_board(EMPTY_ROW * 5 + ''' XXXX..... XOOX..... O.OX..... OOXX..... ''') start_position = Position( board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(EMPTY_ROW * 5 + ''' XXXX..... X..X..... .X.X..... ..XX..... ''') expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(7, 2), ko=None, recent=(PlayerMove(BLACK, pc('B2')), ), to_play=WHITE, ) actual_position = start_position.play_move(pc('B2')) self.assertEqualPositions(actual_position, expected_position)
def test_move_with_capture(self): start_board = load_board(EMPTY_ROW * 5 + ''' XXXX..... XOOX..... O.OX..... OOXX..... ''') start_position = Position( board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(EMPTY_ROW * 5 + ''' XXXX..... X..X..... .X.X..... ..XX..... ''') expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(7, 2), ko=None, recent=(PlayerMove(BLACK, pc('B2')),), to_play=WHITE, ) actual_position = start_position.play_move(pc('B2')) self.assertEqualPositions(actual_position, expected_position)
def simulate(network, board = None, steps=20): ''' Simulates rollout of network for given number of steps (to help understand the tactic) ''' pos = Position(board=board) for i in range(steps): policy, V = network.run(pos) best_move = np.argmax(policy) print('Best move', coords.to_gtp(coords.from_flat(best_move))) pos = pos.play_move(coords.from_flat(best_move)) print(pos)
def test_move(self): start_position = Position( utils_test.BOARD_SIZE, board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = utils_test.load_board(''' .XX....OO X........ ''' + EMPTY_ROW * 7) expected_position = Position( utils_test.BOARD_SIZE, board=expected_board, n=1, komi=6.5, caps=(1, 2), ko=None, recent=(PlayerMove(BLACK, coords.from_kgs(utils_test.BOARD_SIZE, 'C9')), ), to_play=WHITE, ) actual_position = start_position.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'C9')) self.assertEqualPositions(actual_position, expected_position) expected_board2 = utils_test.load_board(''' .XX....OO X.......O ''' + EMPTY_ROW * 7) expected_position2 = Position( utils_test.BOARD_SIZE, board=expected_board2, n=2, komi=6.5, caps=(1, 2), ko=None, recent=( PlayerMove(BLACK, coords.from_kgs(utils_test.BOARD_SIZE, 'C9')), PlayerMove(WHITE, coords.from_kgs(utils_test.BOARD_SIZE, 'J8')), ), to_play=BLACK, ) actual_position2 = actual_position.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'J8')) self.assertEqualPositions(actual_position2, expected_position2)
def test_legal_moves(self): board = load_board(''' .XXXXXXXO XX.OOOOO. OOOOOOOOO XXXXXXXX. OOOOOOOOO XXXXXXXXX XXXXXXXXX XXXXXXXXX XXXXXXXX. ''') position = Position( board=board, n=0, komi=6.5, caps=(0, 0), ko=pc('J8'), recent=tuple(), to_play=BLACK, ) empty_spots = pc_set('A9 C8 J8 J6 J1') B_legal_moves = pc_set('A9 C8 J6') for move in empty_spots: if move not in B_legal_moves: with self.assertRaises(go.IllegalMove): position.play_move(BLACK, move) else: position.play_move(BLACK, move) pass_position = position.pass_move() W_legal_moves = pc_set('C8 J8 J6 J1') for move in empty_spots: if move not in W_legal_moves: with self.assertRaises(go.IllegalMove): pass_position.play_move(WHITE, move) else: pass_position.play_move(WHITE, move)
def test_ko_move_mutable_board(self): start_board = load_board(''' .OX...... OX....... ''' + EMPTY_ROW * 7) start_position = Position( board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(''' X.X...... OX....... ''' + EMPTY_ROW * 7) expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(2, 2), ko=pc('B9'), recent=(PlayerMove(BLACK, pc('A9')),), to_play=WHITE, ) actual_position = start_position.play_move(pc('A9'), mutate=True) self.assertEqualPositions(actual_position, expected_position) # Check that retaking ko is illegal until two intervening moves with self.assertRaises(go.IllegalMove): actual_position.play_move(pc('B9'), mutate=True) pass_twice = actual_position.pass_move(mutate=True).pass_move(mutate=True) ko_delayed_retake = pass_twice.play_move(pc('B9'), mutate=True) expected_position = Position( board=start_board, n=4, komi=6.5, caps=(2, 3), ko=pc('A9'), recent=( PlayerMove(BLACK, pc('A9')), PlayerMove(WHITE, None), PlayerMove(BLACK, None), PlayerMove(WHITE, pc('B9'))), to_play=BLACK, ) self.assertEqualPositions(ko_delayed_retake, expected_position)
def test_ko_move_mutable_board(self): start_board = load_board(''' .OX...... OX....... ''' + EMPTY_ROW * 7) start_position = Position( board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(''' X.X...... OX....... ''' + EMPTY_ROW * 7) expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(2, 2), ko=pc('B9'), recent=(PlayerMove(BLACK, pc('A9')), ), to_play=WHITE, ) actual_position = start_position.play_move(pc('A9'), mutate=True) self.assertEqualPositions(actual_position, expected_position) # Check that retaking ko is illegal until two intervening moves with self.assertRaises(go.IllegalMove): actual_position.play_move(pc('B9'), mutate=True) pass_twice = actual_position.pass_move(mutate=True).pass_move( mutate=True) ko_delayed_retake = pass_twice.play_move(pc('B9'), mutate=True) expected_position = Position( board=start_board, n=4, komi=6.5, caps=(2, 3), ko=pc('A9'), recent=(PlayerMove(BLACK, pc('A9')), PlayerMove(WHITE, None), PlayerMove(BLACK, None), PlayerMove(WHITE, pc('B9'))), to_play=BLACK, ) self.assertEqualPositions(ko_delayed_retake, expected_position)
def test_move(self): start_position = Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = test_utils.load_board(''' .XX....OO X........ ''' + EMPTY_ROW * 7) expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(1, 2), ko=None, recent=(PlayerMove(BLACK, parse_kgs_coords('C9')),), to_play=WHITE, ) actual_position = start_position.play_move(parse_kgs_coords('C9')) self.assertEqualPositions(actual_position, expected_position) expected_board2 = test_utils.load_board(''' .XX....OO X.......O ''' + EMPTY_ROW * 7) expected_position2 = Position( board=expected_board2, n=2, komi=6.5, caps=(1, 2), ko=None, recent=(PlayerMove(BLACK, parse_kgs_coords('C9')), PlayerMove(WHITE, parse_kgs_coords('J8'))), to_play=BLACK, ) actual_position2 = actual_position.play_move(parse_kgs_coords('J8')) self.assertEqualPositions(actual_position2, expected_position2)
def test_move(self): start_position = Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(''' .XX....OO X........ ''' + EMPTY_ROW * 7) expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(1, 2), ko=None, recent=(pc('C9'), ), to_play=WHITE, ) actual_position = start_position.play_move(BLACK, pc('C9')) self.assertEqualPositions(actual_position, expected_position) expected_board2 = load_board(''' .XX....OO X.......O ''' + EMPTY_ROW * 7) expected_position2 = Position( board=expected_board2, n=2, komi=6.5, caps=(1, 2), ko=None, recent=(pc('C9'), pc('J8')), to_play=BLACK, ) actual_position2 = actual_position.play_move(WHITE, pc('J8')) self.assertEqualPositions(actual_position2, expected_position2)
def replay_position(position): ''' Wrapper for a go.Position which replays its history. Assumes an empty start position! (i.e. no handicap, and history must be exhaustive.) for position_w_context in replay_position(position): print(position_w_context.position) ''' assert position.n == len(position.recent), "Position history is incomplete" metadata = GameMetadata(result=position.result(), handicap=0, board_size=position.board.shape[0]) go.set_board_size(metadata.board_size) pos = Position(komi=position.komi) for player_move in position.recent: color, next_move = player_move yield PositionWithContext(pos, next_move, metadata) pos = pos.play_move(next_move, color=color) # return the original position, with unknown next move yield PositionWithContext(pos, None, metadata)
class Referee(): def __init__(self): self.go = Position(n=9, komi=3.25) def action(self, coord): """ 输入:落子坐标 输出:是否合法,提子列表,胜负(0:未决出胜负,1:胜,-1:负) """ # 判断是否pass if coord == [-1, -1]: self.go = self.go.pass_move() # 检查胜负情况 winner = 0 if self.go.is_game_over(): winner = self.go.result() print(self.go.result_string()) return [True, [], winner] # 检查是否合法 if not self.go.is_move_legal(tuple(coord)): return (False, [], 0) # 棋盘信息备份 preBoard = self.go.board.copy() preBoard[coord[0], coord[1]] = 1 # 落子 self.go = self.go.play_move(tuple(coord)) # 检查是否提子,若提子则存储提子信息到列表 absDiff = np.abs(preBoard) - np.abs(self.go.board) takes = np.transpose(np.nonzero(absDiff)) return (True, takes, 0)
def replay_position(position): ''' Wrapper for a go.Position which replays its history. Assumes an empty start position! (i.e. no handicap, and history must be exhaustive.) for position_w_context in replay_position(position): print(position_w_context.position) ''' assert position.n == len(position.recent), "Position history is incomplete" metadata = GameMetadata( result=position.result(), handicap=0, board_size=position.board.shape[0] ) go.set_board_size(metadata.board_size) pos = Position(komi=position.komi) for player_move in position.recent: color, next_move = player_move yield PositionWithContext(pos, next_move, metadata) pos = pos.play_move(next_move, color=color) # return the original position, with unknown next move yield PositionWithContext(pos, None, metadata)
def test_legal_moves(self): board = load_board(''' .XXXXXXXO XX.OOOOO. OOOOOOOOO XXXXXXXX. OOOOOOOOO XXXXXXXXX XXXXXXXXX XXXXXXXXX XXXXXXXX. ''') position = Position( board=board, n=0, komi=6.5, caps=(0, 0), ko=pc('J8'), recent=tuple(), to_play=BLACK, ) empty_spots = pc_set('A9 C8 J8 J6 J1') B_legal_moves = pc_set('A9 C8 J6') for move in empty_spots: if move not in B_legal_moves: with self.assertRaises(go.IllegalMove): position.play_move(move) else: position.play_move(move) # pass should also be legal position.play_move(None) pass_position = position.pass_move() W_legal_moves = pc_set('C8 J8 J6 J1') for move in empty_spots: if move not in W_legal_moves: with self.assertRaises(go.IllegalMove): pass_position.play_move(move) else: pass_position.play_move(move) # pass should also be legal pass_position.play_move(None)
def simOppLatest(self): """ 从对手的上一步落子开始模拟,在此之前的直接随机抽样,不记录中间过程 """ pb = self.board_opp_known.copy() pb.astype(float) pb = pb + self.basePb # 判断对手棋子总数上限,num_oppStones不能大于上限 board_innerQi = self.findInnerQi() num_oppStoneUpperLimit = 81 - len(np.transpose(np.nonzero(self.board_selfNow))) - len(np.transpose(np.nonzero(board_innerQi))) if self.num_oppStones > num_oppStoneUpperLimit: self.num_oppStones = num_oppStoneUpperLimit # 对手不可能在我方落子处或我方eye处有落子 pb.flat[[i for (i, x) in enumerate(self.board_selfNow.flat) if x == self.color]] = 0 pb.flat[[i for (i, x) in enumerate(board_innerQi.flat) if x == 1]] = 0 if not pb.sum(): return pb = pb / pb.sum() for t in range(200): tmpPb = pb.copy() tmpGo = Position(n=9, board=self.board_selfNow, to_play=-self.color) # 对手落子 for i in range(self.num_oppStones - 1): for ntry in range(5): flatIdx = np.random.choice(self.board_flat_idx, 1, p=tmpPb.flat) action_opp = (int(flatIdx / 9), int(flatIdx % 9)) if not tmpGo.is_move_legal(action_opp): continue preBoard = tmpGo.board.copy() preBoard[action_opp[0], action_opp[1]] = 1 tmpGo_sub = tmpGo.play_move(action_opp) absDiff = np.abs(preBoard) - np.abs(tmpGo_sub.board) if len(np.transpose(np.nonzero(absDiff))): continue tmpGo = tmpGo_sub tmpGo.to_play = -self.color tmpPb.flat[flatIdx] = 0 tmpPb = tmpPb / tmpPb.sum() break # 对手的最后一次落子 for q in range(10): flatIdx = np.random.choice(self.board_flat_idx, 1, p=tmpPb.flat) action_opp = (int(flatIdx / 9), int(flatIdx % 9)) if not tmpGo.is_move_legal(action_opp): continue preBoard = tmpGo.board.copy() preBoard[action_opp[0], action_opp[1]] = 1 tmpGo = tmpGo.play_move(action_opp) absDiff = np.abs(preBoard) - np.abs(tmpGo.board) if len(np.transpose(np.nonzero(absDiff))): continue else: self.board_sims.append(tmpGo) break """
return pv_mcts_coord return pv_mcts_action def boltzman(xs, temperature): xs = [x**(1 / temperature) for x in xs] return [x / sum(xs) for x in xs] if __name__ == '__main__': cur_dir = Path(__file__).parent.absolute() cur_dir = cur_dir / 'model' path = sorted(cur_dir.glob('*.h5'))[-1] model = load_model(str(path)) state = Position() next_action = pv_mcts_action(model, 1.0) while True: if state.is_game_over(): print(state.result_string()) break action = next_action(state) state = state.play_move(action) print(state.__str__(False)) print()