def test_ko_move(self): start_board = utils_test.load_board(''' .OX...... OX....... ''' + EMPTY_ROW * 7) start_position = Position( utils_test.BOARD_SIZE, board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = utils_test.load_board(''' X.X...... OX....... ''' + EMPTY_ROW * 7) expected_position = Position( utils_test.BOARD_SIZE, board=expected_board, n=1, komi=6.5, caps=(2, 2), ko=coords.from_kgs(utils_test.BOARD_SIZE, 'B9'), recent=(PlayerMove(BLACK, coords.from_kgs(utils_test.BOARD_SIZE, 'A9')), ), to_play=WHITE, ) actual_position = start_position.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'A9')) self.assertEqualPositions(actual_position, expected_position) # Check that retaking ko is illegal until two intervening moves with self.assertRaises(go.IllegalMove): actual_position.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'B9')) pass_twice = actual_position.pass_move().pass_move() ko_delayed_retake = pass_twice.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'B9')) expected_position = Position( utils_test.BOARD_SIZE, board=start_board, n=4, komi=6.5, caps=(2, 3), ko=coords.from_kgs(utils_test.BOARD_SIZE, 'A9'), recent=( PlayerMove(BLACK, coords.from_kgs(utils_test.BOARD_SIZE, 'A9')), PlayerMove(WHITE, None), PlayerMove(BLACK, None), PlayerMove(WHITE, coords.from_kgs(utils_test.BOARD_SIZE, 'B9')), ), to_play=BLACK) self.assertEqualPositions(ko_delayed_retake, expected_position)
def test_move_with_capture(self): start_board = load_board(EMPTY_ROW * 5 + ''' XXXX..... XOOX..... O.OX..... OOXX..... ''') start_position = Position( board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(EMPTY_ROW * 5 + ''' XXXX..... X..X..... .X.X..... ..XX..... ''') expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(7, 2), ko=None, recent=(PlayerMove(BLACK, pc('B2')), ), to_play=WHITE, ) actual_position = start_position.play_move(pc('B2')) self.assertEqualPositions(actual_position, expected_position)
def test_is_move_suicidal(self): board = test_utils.load_board(''' ...O.O... ....O.... XO.....O. OXO...OXO O.XO.OX.O OXO...OOX XO....... ......XXO .....XOO. ''') position = Position( board=board, to_play=BLACK, ) suicidal_moves = parse_kgs_coords_set('E9 H5') nonsuicidal_moves = parse_kgs_coords_set('B5 J1 A9') for move in suicidal_moves: # sanity check my coordinate input assert(position.board[move] == go.EMPTY) self.assertTrue(position.is_move_suicidal(move), str(move)) for move in nonsuicidal_moves: # sanity check my coordinate input assert(position.board[move] == go.EMPTY) self.assertFalse(position.is_move_suicidal(move), str(move))
def test_is_move_suicidal(self): board = load_board(''' ...O.O... ....O.... XO.....O. OXO...OXO O.XO.OX.O OXO...OOX XO....... ......XXO .....XOO. ''') position = Position( board=board, to_play=BLACK, ) suicidal_moves = pc_set('E9 H5') nonsuicidal_moves = pc_set('B5 J1 A9') for move in suicidal_moves: assert (position.board[move] == go.EMPTY ) #sanity check my coordinate input self.assertTrue(position.is_move_suicidal(move), str(move)) for move in nonsuicidal_moves: assert (position.board[move] == go.EMPTY ) #sanity check my coordinate input self.assertFalse(position.is_move_suicidal(move), str(move))
def test_is_move_suicidal(self): board = utils_test.load_board(''' ...O.O... ....O.... XO.....O. OXO...OXO O.XO.OX.O OXO...OOX XO....... ......XXO .....XOO. ''') position = Position( utils_test.BOARD_SIZE, board=board, to_play=BLACK, ) suicidal_moves = coords_from_kgs_set('E9 H5') nonsuicidal_moves = coords_from_kgs_set('B5 J1 A9') for move in suicidal_moves: # sanity check my coordinate input assert position.board[move] == go.EMPTY self.assertTrue(position.is_move_suicidal(move), str(move)) for move in nonsuicidal_moves: # sanity check my coordinate input assert position.board[move] == go.EMPTY self.assertFalse(position.is_move_suicidal(move), str(move))
def test_move_with_capture(self): start_board = load_board(EMPTY_ROW * 5 + ''' XXXX..... XOOX..... O.OX..... OOXX..... ''') start_position = Position( board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(EMPTY_ROW * 5 + ''' XXXX..... X..X..... .X.X..... ..XX..... ''') expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(7, 2), ko=None, recent=(PlayerMove(BLACK, pc('B2')),), to_play=WHITE, ) actual_position = start_position.play_move(pc('B2')) self.assertEqualPositions(actual_position, expected_position)
def simulate(network, board = None, steps=20): ''' Simulates rollout of network for given number of steps (to help understand the tactic) ''' pos = Position(board=board) for i in range(steps): policy, V = network.run(pos) best_move = np.argmax(policy) print('Best move', coords.to_gtp(coords.from_flat(best_move))) pos = pos.play_move(coords.from_flat(best_move)) print(pos)
def test_move(self): start_position = Position( utils_test.BOARD_SIZE, board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = utils_test.load_board(''' .XX....OO X........ ''' + EMPTY_ROW * 7) expected_position = Position( utils_test.BOARD_SIZE, board=expected_board, n=1, komi=6.5, caps=(1, 2), ko=None, recent=(PlayerMove(BLACK, coords.from_kgs(utils_test.BOARD_SIZE, 'C9')), ), to_play=WHITE, ) actual_position = start_position.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'C9')) self.assertEqualPositions(actual_position, expected_position) expected_board2 = utils_test.load_board(''' .XX....OO X.......O ''' + EMPTY_ROW * 7) expected_position2 = Position( utils_test.BOARD_SIZE, board=expected_board2, n=2, komi=6.5, caps=(1, 2), ko=None, recent=( PlayerMove(BLACK, coords.from_kgs(utils_test.BOARD_SIZE, 'C9')), PlayerMove(WHITE, coords.from_kgs(utils_test.BOARD_SIZE, 'J8')), ), to_play=BLACK, ) actual_position2 = actual_position.play_move( coords.from_kgs(utils_test.BOARD_SIZE, 'J8')) self.assertEqualPositions(actual_position2, expected_position2)
def test_ko_move_mutable_board(self): start_board = load_board(''' .OX...... OX....... ''' + EMPTY_ROW * 7) start_position = Position( board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(''' X.X...... OX....... ''' + EMPTY_ROW * 7) expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(2, 2), ko=pc('B9'), recent=(PlayerMove(BLACK, pc('A9')),), to_play=WHITE, ) actual_position = start_position.play_move(pc('A9'), mutate=True) self.assertEqualPositions(actual_position, expected_position) # Check that retaking ko is illegal until two intervening moves with self.assertRaises(go.IllegalMove): actual_position.play_move(pc('B9'), mutate=True) pass_twice = actual_position.pass_move(mutate=True).pass_move(mutate=True) ko_delayed_retake = pass_twice.play_move(pc('B9'), mutate=True) expected_position = Position( board=start_board, n=4, komi=6.5, caps=(2, 3), ko=pc('A9'), recent=( PlayerMove(BLACK, pc('A9')), PlayerMove(WHITE, None), PlayerMove(BLACK, None), PlayerMove(WHITE, pc('B9'))), to_play=BLACK, ) self.assertEqualPositions(ko_delayed_retake, expected_position)
def test_ko_move_mutable_board(self): start_board = load_board(''' .OX...... OX....... ''' + EMPTY_ROW * 7) start_position = Position( board=start_board, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(''' X.X...... OX....... ''' + EMPTY_ROW * 7) expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(2, 2), ko=pc('B9'), recent=(PlayerMove(BLACK, pc('A9')), ), to_play=WHITE, ) actual_position = start_position.play_move(pc('A9'), mutate=True) self.assertEqualPositions(actual_position, expected_position) # Check that retaking ko is illegal until two intervening moves with self.assertRaises(go.IllegalMove): actual_position.play_move(pc('B9'), mutate=True) pass_twice = actual_position.pass_move(mutate=True).pass_move( mutate=True) ko_delayed_retake = pass_twice.play_move(pc('B9'), mutate=True) expected_position = Position( board=start_board, n=4, komi=6.5, caps=(2, 3), ko=pc('A9'), recent=(PlayerMove(BLACK, pc('A9')), PlayerMove(WHITE, None), PlayerMove(BLACK, None), PlayerMove(WHITE, pc('B9'))), to_play=BLACK, ) self.assertEqualPositions(ko_delayed_retake, expected_position)
def replay_sgf(sgf_contents): ''' Wrapper for sgf files, returning go.PositionWithContext instances. It does NOT return the very final position, as there is no follow up. To get the final position, call pwc.position.play_move(pwc.next_move) on the last PositionWithContext returned. Example usage: with open(filename) as f: for position_w_context in replay_sgf(f.read()): print(position_w_context.position) ''' collection = sgf.parse(sgf_contents) game = collection.children[0] props = game.root.properties assert int(sgf_prop(props.get('GM', ['1']))) == 1, "Not a Go SGF!" komi = 0 if props.get('KM') != None: komi = float(sgf_prop(props.get('KM'))) result = utils.parse_game_result(sgf_prop(props.get('RE'))) pos = Position(komi=komi) current_node = game.root while pos is not None and current_node.next is not None: pos = handle_node(pos, current_node) maybe_correct_next(pos, current_node.next) next_move = get_next_move(current_node) yield PositionWithContext(pos, next_move, result) current_node = current_node.next
def replay_sgf(sgf_contents): ''' ''' collection=sgf.parse(sgf_contents) #collection能把数据对象化 便于操作 game=collection.children[0] prop=game.root.properties assert int(sgf_prop(prop.get('GM',['1'])))==1,"这不是围棋棋谱!" #prop.get 取到一个元素 字符转数字 komi=0 #初始化贴子规则 if prop.get('KM') != None: komi=float(sgf_prop(prop.get('KM'))) metadata=GameMetadata( result=sgf_prop(prop.get('RE')), #比赛结果 handicap=int(sgf_prop(prop.get('HA', [0]))), #让子 board_size=int(sgf_prop(prop.get('SZ'))) #棋盘大小 ) go.set_board_size(metadata.board_size) pos=Position(komi=komi) current_node=game.root while pos is not None and current_node is not None : #开始遍历棋谱节点 sgf字母结点 转为纯数字结点 pos = handle_node(pos, current_node) maybe_correct_next(pos, current_node.next) next_move = get_next_move(current_node) yield PositionWithContext(pos, next_move, metadata) current_node = current_node.next pass
def add_stones(pos, black_stones_added, white_stones_added): working_board = np.copy(pos.board) go.place_stones(working_board, go.BLACK, black_stones_added) go.place_stones(working_board, go.WHITE, white_stones_added) new_position = Position(board=working_board, n=pos.n, komi=pos.komi, caps=pos.caps, ko=pos.ko, recent=pos.recent, to_play=pos.to_play) return new_position
def replay_sgf(sgf_contents): ''' Wrapper for sgf files, exposing contents as position_w_context instances with open(filename) as f: for position_w_context in replay_sgf(f.read()): print(position_w_context.position) ''' collection = sgf.parse(sgf_contents) game = collection.children[0] props = game.root.properties assert int(sgf_prop(props.get('GM', ['1']))) == 1, "Not a Go SGF!" komi = 0 if props.get('KM') != None: komi = float(sgf_prop(props.get('KM'))) metadata = GameMetadata(result=sgf_prop(props.get('RE')), handicap=int(sgf_prop(props.get('HA', [0]))), board_size=int(sgf_prop(props.get('SZ')))) go.set_board_size(metadata.board_size) pos = Position(komi=komi) current_node = game.root while pos is not None and current_node is not None: pos = handle_node(pos, current_node) maybe_correct_next(pos, current_node.next) next_move = get_next_move(current_node) yield PositionWithContext(pos, next_move, metadata) current_node = current_node.next
def action(self): """计算落子并返回坐标""" # 1、模拟完全信息棋面 with utils.logged_timer("simulation"): self.board_sims = [] self.simOppLatest() print('num_sims: ', len(self.board_sims)) #print('one of sim:\n',self.board_sims[-1]) if len(self.board_sims) == 0: # 若模拟对手棋面失败,仅输入己方棋面信息 tmpGo = Position(n=9, board=self.board_selfNow, to_play=self.color) self.board_sims.append(tmpGo) # 2、计算每个可行位置的总得分 with utils.logged_timer("calculation"): pbs, vs = self.scoreNet.run_many(self.board_sims) scoreBoard = np.sum(pbs, axis=0) # 自己的位置得分设为零 selfPlaces = np.transpose(np.nonzero(self.board_selfNow)) for sp in selfPlaces: scoreBoard[sp[0] * 9 + sp[1]] = 0 # 自己内部的气不准下 board_innerQi = self.findInnerQi() scoreBoard.flat[[ i for (i, x) in enumerate(board_innerQi.flat) if x == 1 ]] = 0 # illegal的位置得分设为零 scoreBoard.flat[[ i for (i, x) in enumerate(self.illegalBoard.flat) if x == 1 ]] = 0 # 不主动pass scoreBoard = scoreBoard[:81] #print('scoreBoard:\n',scoreBoard) # pass的情况 if scoreBoard.sum() == 0: action = [-1, -1] self.tryAction = action else: flatMaxIdx = np.argmax(scoreBoard) action = [int(flatMaxIdx / 9), int(flatMaxIdx % 9)] self.tryAction = action with closing(shelve.open('buffer', 'c')) as shelf: shelf['color'] = self.color shelf['board_selfNow'] = self.board_selfNow shelf['board_opp_known'] = self.board_opp_known shelf['num_oppStones'] = self.num_oppStones return action
def perturb_position(pos, new_board=None, memodict={}): ''' This function returns Position of the perturbed board (new_board) ''' if new_board is None: new_board = np.copy(pos.board) new_lib_tracker = LibertyTracker.from_board(new_board) return Position(new_board, pos.n, pos.komi, pos.caps, new_lib_tracker, pos.ko, pos.recent, pos.board_deltas, pos.to_play)
def test_scoring(self): board = load_board(''' .XX...... OOXX..... OOOX...X. OXX...... OOXXXXXX. OOOXOXOXX .O.OOXOOX .O.O.OOXX ......OOO ''') position = Position( board=board, n=54, komi=6.5, caps=(2, 5), ko=None, recent=tuple(), to_play=BLACK, ) expected_score = 1.5 self.assertEqual(position.score(), expected_score) board = load_board(''' XXX...... OOXX..... OOOX...X. OXX...... OOXXXXXX. OOOXOXOXX .O.OOXOOX .O.O.OOXX ......OOO ''') position = Position( board=board, n=55, komi=6.5, caps=(2, 5), ko=None, recent=tuple(), to_play=WHITE, ) expected_score = 2.5 self.assertEqual(position.score(), expected_score)
def test_move(self): start_position = Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = test_utils.load_board(''' .XX....OO X........ ''' + EMPTY_ROW * 7) expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(1, 2), ko=None, recent=(PlayerMove(BLACK, parse_kgs_coords('C9')),), to_play=WHITE, ) actual_position = start_position.play_move(parse_kgs_coords('C9')) self.assertEqualPositions(actual_position, expected_position) expected_board2 = test_utils.load_board(''' .XX....OO X.......O ''' + EMPTY_ROW * 7) expected_position2 = Position( board=expected_board2, n=2, komi=6.5, caps=(1, 2), ko=None, recent=(PlayerMove(BLACK, parse_kgs_coords('C9')), PlayerMove(WHITE, parse_kgs_coords('J8'))), to_play=BLACK, ) actual_position2 = actual_position.play_move(parse_kgs_coords('J8')) self.assertEqualPositions(actual_position2, expected_position2)
def test_move(self): start_position = Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=BLACK, ) expected_board = load_board(''' .XX....OO X........ ''' + EMPTY_ROW * 7) expected_position = Position( board=expected_board, n=1, komi=6.5, caps=(1, 2), ko=None, recent=(pc('C9'), ), to_play=WHITE, ) actual_position = start_position.play_move(BLACK, pc('C9')) self.assertEqualPositions(actual_position, expected_position) expected_board2 = load_board(''' .XX....OO X.......O ''' + EMPTY_ROW * 7) expected_position2 = Position( board=expected_board2, n=2, komi=6.5, caps=(1, 2), ko=None, recent=(pc('C9'), pc('J8')), to_play=BLACK, ) actual_position2 = actual_position.play_move(WHITE, pc('J8')) self.assertEqualPositions(actual_position2, expected_position2)
def test_legal_moves(self): board = test_utils.load_board(''' .O.O.XOX. O..OOOOOX ......O.O OO.....OX XO.....X. .O....... OX.....OO XX...OOOX .....O.X. ''') position = Position(board=board, to_play=BLACK) illegal_moves = parse_kgs_coords_set('A9 E9 J9') legal_moves = parse_kgs_coords_set('A4 G1 J1 H7') | {None} for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) # check that the bulk legal test agrees with move-by-move illegal test. bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=unflatten_coords(i)): self.assertEqual( bulk_legal, position.is_move_legal(unflatten_coords(i))) # flip the colors and check that everything is still (il)legal position = Position(board=-board, to_play=WHITE) for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=unflatten_coords(i)): self.assertEqual( bulk_legal, position.is_move_legal(unflatten_coords(i)))
def replay_position(position): ''' Wrapper for a go.Position which replays its history. Assumes an empty start position! (i.e. no handicap, and history must be exhaustive.) for position_w_context in replay_position(position): print(position_w_context.position) ''' assert position.n == len(position.recent), "Position history is incomplete" metadata = GameMetadata(result=position.result(), handicap=0, board_size=position.board.shape[0]) go.set_board_size(metadata.board_size) pos = Position(komi=position.komi) for player_move in position.recent: color, next_move = player_move yield PositionWithContext(pos, next_move, metadata) pos = pos.play_move(next_move, color=color) # return the original position, with unknown next move yield PositionWithContext(pos, None, metadata)
def test_passing(self): start_position = Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=pc('A1'), recent=tuple(), to_play=BLACK, ) expected_position = Position( board=TEST_BOARD, n=1, komi=6.5, caps=(1, 2), ko=None, recent=(PlayerMove(BLACK, None), ), to_play=WHITE, ) pass_position = start_position.pass_move() self.assertEqualPositions(pass_position, expected_position)
def test_flipturn(self): start_position = Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=pc('A1'), recent=tuple(), to_play=BLACK, ) expected_position = Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=WHITE, ) flip_position = start_position.flip_playerturn() self.assertEqualPositions(flip_position, expected_position)
def test_passing(self): start_position = Position( board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=pc('A1'), recent=tuple(), to_play=BLACK, ) expected_position = Position( board=TEST_BOARD, n=1, komi=6.5, caps=(1, 2), ko=None, recent=(PlayerMove(BLACK, None),), to_play=WHITE, ) pass_position = start_position.pass_move() self.assertEqualPositions(pass_position, expected_position)
def test_flipturn(self): start_position = Position( utils_test.BOARD_SIZE, board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=coords.from_kgs(utils_test.BOARD_SIZE, 'A1'), recent=tuple(), to_play=BLACK, ) expected_position = Position( utils_test.BOARD_SIZE, board=TEST_BOARD, n=0, komi=6.5, caps=(1, 2), ko=None, recent=tuple(), to_play=WHITE, ) flip_position = start_position.flip_playerturn() self.assertEqualPositions(flip_position, expected_position)
def replay_position(position): ''' Wrapper for a go.Position which replays its history. Assumes an empty start position! (i.e. no handicap, and history must be exhaustive.) for position_w_context in replay_position(position): print(position_w_context.position) ''' assert position.n == len(position.recent), "Position history is incomplete" metadata = GameMetadata( result=position.result(), handicap=0, board_size=position.board.shape[0] ) go.set_board_size(metadata.board_size) pos = Position(komi=position.komi) for player_move in position.recent: color, next_move = player_move yield PositionWithContext(pos, next_move, metadata) pos = pos.play_move(next_move, color=color) # return the original position, with unknown next move yield PositionWithContext(pos, None, metadata)
def test_legal_moves(self): board = load_board(''' .XXXXXXXO XX.OOOOO. OOOOOOOOO XXXXXXXX. OOOOOOOOO XXXXXXXXX XXXXXXXXX XXXXXXXXX XXXXXXXX. ''') position = Position( board=board, n=0, komi=6.5, caps=(0, 0), ko=pc('J8'), recent=tuple(), to_play=BLACK, ) empty_spots = pc_set('A9 C8 J8 J6 J1') B_legal_moves = pc_set('A9 C8 J6') for move in empty_spots: if move not in B_legal_moves: with self.assertRaises(go.IllegalMove): position.play_move(move) else: position.play_move(move) # pass should also be legal position.play_move(None) pass_position = position.pass_move() W_legal_moves = pc_set('C8 J8 J6 J1') for move in empty_spots: if move not in W_legal_moves: with self.assertRaises(go.IllegalMove): pass_position.play_move(move) else: pass_position.play_move(move) # pass should also be legal pass_position.play_move(None)
def main(argv): network = dual_net.DualNetwork('minigo-models/models/000737-fury') # add path to model board = np.zeros([N, N], dtype=np.int8) pos_w_con = list(replay_sgf_file('go_puzzles/10458/10494.sgf')) # Loading a puzzle from go_puzzles folder board += pos_w_con[0].position.board # Setting up the board # Let's add new pieces from another puzzle pos_w_con = list(replay_sgf_file('go_puzzles/14511/14515.sgf')) board += pos_w_con[0].position.board # Load the board position pos = Position(board = board) print(pos) # Generate saliency maps, see results folder play_network(network, board)
def test_scoring(self): board = utils_test.load_board(''' .XX...... OOXX..... OOOX...X. OXX...... OOXXXXXX. OOOXOXOXX .O.OOXOOX .O.O.OOXX ......OOO ''') position = Position( utils_test.BOARD_SIZE, board=board, n=54, komi=6.5, caps=(2, 5), ko=None, recent=tuple(), to_play=BLACK, ) expected_score = 1.5 self.assertEqual(position.score(), expected_score) board = utils_test.load_board(''' XXX...... OOXX..... OOOX...X. OXX...... OOXXXXXX. OOOXOXOXX .O.OOXOOX .O.O.OOXX ......OOO ''') position = Position( utils_test.BOARD_SIZE, board=board, n=55, komi=6.5, caps=(2, 5), ko=None, recent=tuple(), to_play=WHITE, ) expected_score = 2.5 self.assertEqual(position.score(), expected_score)
def test_is_move_reasonable(self): board = load_board(''' .XXOOOXXX X.XO.OX.X XXXOOOXX. ...XXX..X XXXX..... OOOX....O X.OXX.OO. .XO.X.O.O XXO.X.OO. ''') position = Position( board=board, to_play=BLACK, ) reasonable_moves = pc_set('E8 B3') unreasonable_moves = pc_set('A9 B8 H8 J7 A2 J3 H2 J1') for move in reasonable_moves: self.assertTrue(is_move_reasonable(position, move), str(move)) for move in unreasonable_moves: self.assertFalse(is_move_reasonable(position, move), str(move))
class Referee(): def __init__(self): self.go = Position(n=9, komi=3.25) def action(self, coord): """ 输入:落子坐标 输出:是否合法,提子列表,胜负(0:未决出胜负,1:胜,-1:负) """ # 判断是否pass if coord == [-1, -1]: self.go = self.go.pass_move() # 检查胜负情况 winner = 0 if self.go.is_game_over(): winner = self.go.result() print(self.go.result_string()) return [True, [], winner] # 检查是否合法 if not self.go.is_move_legal(tuple(coord)): return (False, [], 0) # 棋盘信息备份 preBoard = self.go.board.copy() preBoard[coord[0], coord[1]] = 1 # 落子 self.go = self.go.play_move(tuple(coord)) # 检查是否提子,若提子则存储提子信息到列表 absDiff = np.abs(preBoard) - np.abs(self.go.board) takes = np.transpose(np.nonzero(absDiff)) return (True, takes, 0)
return pv_mcts_coord return pv_mcts_action def boltzman(xs, temperature): xs = [x**(1 / temperature) for x in xs] return [x / sum(xs) for x in xs] if __name__ == '__main__': cur_dir = Path(__file__).parent.absolute() cur_dir = cur_dir / 'model' path = sorted(cur_dir.glob('*.h5'))[-1] model = load_model(str(path)) state = Position() next_action = pv_mcts_action(model, 1.0) while True: if state.is_game_over(): print(state.result_string()) break action = next_action(state) state = state.play_move(action) print(state.__str__(False)) print()
def __init__(self): self.go = Position(n=9, komi=3.25)
def main(argv): network = dual_net.DualNetwork( 'minigo-models/models/000737-fury') # add path to model board = np.zeros([N, N], dtype=np.int8) # pos_w_con = list(replay_sgf_file('go_puzzles/14511/14511.sgf')) # pos_w_con = list(replay_sgf_file('go_puzzles/10/10.sgf')) # board += pos_w_con[0].position.board # pos_w_con = list(replay_sgf_file('go_puzzles/9225/9225.sgf')) # board += pos_w_con[0].position.board # pos_w_con = list(replay_sgf_file('go_puzzles/14571/14587.sgf')) # board += pos_w_con[0].position.board # pos_w_con = list(replay_sgf_file('go_puzzles/14054/14064.sgf')) # board += pos_w_con[0].position.board # pos_w_con = list(replay_sgf_file('go_puzzles/10458/7592.sgf')) # board += pos_w_con[0].position.board # pos_w_con = list(replay_sgf_file('go_puzzles/10458/10458.sgf')) # board += pos_w_con[0].position.board # pos_w_con = list(replay_sgf_file('go_puzzles/10458/10495.sgf')) # board += pos_w_con[0].position.board pos_w_con = list(replay_sgf_file('go_puzzles/10458/10494.sgf')) board += pos_w_con[0].position.board # pos_w_con = list(replay_sgf_file('go_puzzles/10458/7593.sgf')) # board += pos_w_con[0].position.board pos_w_con = list(replay_sgf_file('go_puzzles/14511/14515.sgf')) board += pos_w_con[0].position.board # pos_w_con = list(replay_sgf_file('go_puzzles/10458/7589.sgf')) # board += pos_w_con[0].position.board # for i in pos_w_con: # print(i.position) # board[5, 7] = -1 # board[6][7] = -1 # board[8][4:6] = -1 # board[3][8] = -1 # board[5][3] = -1 # board[[11,12,13],:] = 0 pos = Position(board=board) # board = board + pos_w_con[0].position.board # print(pos) # board[0][3] = -1 # board[0][4] = 1 # board[1][1] = -1 # board[1][3] = -1 # board[1][4] = 1 # board[2][0] = -1 # board[2, 2] = -1 # board[2,3:5] = 1 # board[3, 0:2] = -1 # board[3, [2, 4]] = 1 # board[4, 0] = -1 # board[4, [1, 3]] = 1 # board[5, :3] = 1 # snap back # board = np.zeros([19, 19], dtype=np.int8) # board[0, 2] = 1 # board[0, [5,6]] = -1 # board[1][[1,5]] = 1 # board[1][[2,3,4,6]] = -1 # board[2][[0, 2,3,4,5]] = 1 # board[[2,3], 6] = -1 # Noise # board[2,-2] = 1 # # board[4, 11] = -1 # board[5, 15] = 1 # board[8, 15] = -1 # board[10, -1] = 1 # # board[12, 10] = -1 # # board[12, 13] = 1 # board[17, 16] = -1 # board[abs(board)==1] *= -1 # to invert the board colors pos = Position(board=board) print(pos) # simulate(network, board, steps=10) play_network(network, board)
def simOppLatest(self): """ 从对手的上一步落子开始模拟,在此之前的直接随机抽样,不记录中间过程 """ pb = self.board_opp_known.copy() pb.astype(float) pb = pb + self.basePb # 判断对手棋子总数上限,num_oppStones不能大于上限 board_innerQi = self.findInnerQi() num_oppStoneUpperLimit = 81 - len(np.transpose(np.nonzero(self.board_selfNow))) - len(np.transpose(np.nonzero(board_innerQi))) if self.num_oppStones > num_oppStoneUpperLimit: self.num_oppStones = num_oppStoneUpperLimit # 对手不可能在我方落子处或我方eye处有落子 pb.flat[[i for (i, x) in enumerate(self.board_selfNow.flat) if x == self.color]] = 0 pb.flat[[i for (i, x) in enumerate(board_innerQi.flat) if x == 1]] = 0 if not pb.sum(): return pb = pb / pb.sum() for t in range(200): tmpPb = pb.copy() tmpGo = Position(n=9, board=self.board_selfNow, to_play=-self.color) # 对手落子 for i in range(self.num_oppStones - 1): for ntry in range(5): flatIdx = np.random.choice(self.board_flat_idx, 1, p=tmpPb.flat) action_opp = (int(flatIdx / 9), int(flatIdx % 9)) if not tmpGo.is_move_legal(action_opp): continue preBoard = tmpGo.board.copy() preBoard[action_opp[0], action_opp[1]] = 1 tmpGo_sub = tmpGo.play_move(action_opp) absDiff = np.abs(preBoard) - np.abs(tmpGo_sub.board) if len(np.transpose(np.nonzero(absDiff))): continue tmpGo = tmpGo_sub tmpGo.to_play = -self.color tmpPb.flat[flatIdx] = 0 tmpPb = tmpPb / tmpPb.sum() break # 对手的最后一次落子 for q in range(10): flatIdx = np.random.choice(self.board_flat_idx, 1, p=tmpPb.flat) action_opp = (int(flatIdx / 9), int(flatIdx % 9)) if not tmpGo.is_move_legal(action_opp): continue preBoard = tmpGo.board.copy() preBoard[action_opp[0], action_opp[1]] = 1 tmpGo = tmpGo.play_move(action_opp) absDiff = np.abs(preBoard) - np.abs(tmpGo.board) if len(np.transpose(np.nonzero(absDiff))): continue else: self.board_sims.append(tmpGo) break """
def test_legal_moves(self): board = test_utils.load_board(''' .O.O.XOX. O..OOOOOX ......O.O OO.....OX XO.....X. .O....... OX.....OO XX...OOOX .....O.X. ''') position = Position(board=board, to_play=BLACK) illegal_moves = parse_kgs_coords_set('A9 E9 J9') legal_moves = parse_kgs_coords_set('A4 G1 J1 H7') | {None} for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) # check that the bulk legal test agrees with move-by-move illegal test. bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=unflatten_coords(i)): self.assertEqual(bulk_legal, position.is_move_legal(unflatten_coords(i))) # flip the colors and check that everything is still (il)legal position = Position(board=-board, to_play=WHITE) for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=unflatten_coords(i)): self.assertEqual(bulk_legal, position.is_move_legal(unflatten_coords(i)))
def play_network(network, board=None): ''' Generates saliency maps of 3 methods given a board position ''' pos = Position(board=board) original_moves = {} heatmap = np.zeros((N,N), dtype=np.float) policy, V = network.run(pos) best_move = np.argmax(policy) print("Best Move is", coords.to_gtp(coords.from_flat(best_move))) p = np.max(policy) player = get_mcts_player(network, pos) node = player.root old_Q = node.child_Q[best_move] atariV = np.zeros([N, N], dtype=np.float) atariP = np.zeros([N, N], dtype=np.float) delQ = np.zeros([N, N], dtype=np.float) heatmap = np.zeros([N, N], dtype=np.float) for i in range(N): for j in range(N): if board[i, j] == 1 or board[i, j] == -1: print(i, j) print("---------------------") new_board = np.copy(board) new_board[i, j] = 0 new_pos = perturb_position(pos, new_board) new_policy, new_V = network.run(new_pos) new_p = new_policy[best_move] player = get_mcts_player(network, pos) node = player.root # print(node.describe()) new_Q = node.child_Q[best_move] atariV[i, j] = 0.5*((V - new_V)**2) atariP[i, j] = 0.5*np.linalg.norm(policy - new_policy) dP = p - new_p dQ = old_Q - new_Q K = cross_entropy(policy, new_policy, best_move) if dP>0: heatmap[i, j] = 2*dP/(1 + dP*K) if dQ>0: delQ[i, j] = dQ atariV = (atariV - np.min(atariV))/(np.max(atariV) - np.min(atariV)) atariP = (atariP - np.min(atariP))/(np.max(atariP) - np.min(atariP)) # heatmap[heatmap < np.max(heatmap)/3] = 0 # atariV[atariV < np.max(atariV)/3] = 0 # atariP[atariP < np.max(atariP)/3] = 0 # delQ[delQ < np.max(delQ)/3] = 0 frame = np.zeros((N,N,3)) frame = saliency_combine(atariV, frame, blur=256, channel=2) frame = saliency_combine(atariP, frame, blur=256, channel=0) plt.figure(1) plt.imshow(atariV, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'atariV.png') plt.show() plt.figure(2) plt.imshow(atariP, cmap= 'Reds') plt.colorbar() plt.savefig(save_path + 'atariP.png') plt.show() plt.figure(3) plt.imshow(frame) plt.savefig(save_path + 'atari.png') plt.show() plt.figure(4) plt.imshow(delQ, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'deltaQ.png') plt.show() plt.figure(5) plt.imshow(heatmap, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'entropy.png') plt.show()
def play_mcts(network, board=None): pos = Position(board=board) player = get_mcts_player(network, pos) node = player.root children = node.rank_children() soft_n = node.child_N / max(1, sum(node.child_N)) original_moves = {} heatmap = np.zeros((N, N), dtype=np.float) a_b = None for i in children: if node.child_N[i] == 0: break if a_b is None: a_b = coords.from_flat(i) original_moves[coords.to_gtp(coords.from_flat(i))] = soft_n[i] a_b = player.pick_move() # player.play_move(move) a_b_coords = a_b a_b = coords.to_gtp(a_b) print(original_moves) print("best action: ", a_b) print(node.position) p = original_moves[a_b] print(p) for i in range(N): for j in range(N): if board[i][j] == -1 or board[i][j] == 1: new_board = np.copy(board) new_board[i, j] = 0 new_pos = perturb_position(pos, new_board) if new_pos.is_move_legal(a_b_coords): player = get_mcts_player(network, new_pos) node = player.root print(node.position) new_moves = {} children = node.rank_children() soft_n = node.child_N / max(1, sum(node.child_N)) for ch in children: if node.child_N[ch] == 0: break new_moves[coords.to_gtp(coords.from_flat(ch))] = soft_n[ch] new_a_b = player.pick_move() # player.play_move(move) new_a_b = coords.to_gtp(new_a_b) # if new_a_b == 'F5': print("---------------------") # print("Moves: ", new_moves) if a_b in new_moves: new_p = new_moves[a_b] else: new_p = 0. print("New best move", new_a_b) print("p", new_p) print("------------------") K = cross_entropy_mcts(original_moves, new_moves, a_b) if K == -1: print("index", i, j) heatmap[i, j] = -1.0 continue dP = p - new_p if dP > 0: heatmap[i, j] = 2.0*dP/(1. + dP*K) else: heatmap[i, j] = -1.0 heatmap[heatmap == -1] = np.max(heatmap) heatmap[heatmap<np.max(heatmap)/1.5] = 0 plt.imshow(heatmap, cmap='jet') plt.colorbar() plt.show() return player