def test_parsing_9x9(self): self.assertEqual(coords.from_sgf('aa'), (0, 0)) self.assertEqual(coords.from_sgf('ac'), (2, 0)) self.assertEqual(coords.from_sgf('ca'), (0, 2)) self.assertEqual(coords.from_sgf(''), None) self.assertEqual(coords.to_sgf(None), '') self.assertEqual( 'aa', coords.to_sgf(coords.from_sgf('aa'))) self.assertEqual( 'sa', coords.to_sgf(coords.from_sgf('sa'))) self.assertEqual( (1, 17), coords.from_sgf(coords.to_sgf((1, 17)))) self.assertEqual(coords.from_kgs('A1'), (8, 0)) self.assertEqual(coords.from_kgs('A9'), (0, 0)) self.assertEqual(coords.from_kgs('C2'), (7, 2)) self.assertEqual(coords.from_kgs('J2'), (7, 8)) self.assertEqual(coords.from_pygtp((1, 1)), (8, 0)) self.assertEqual(coords.from_pygtp((1, 9)), (0, 0)) self.assertEqual(coords.from_pygtp((3, 2)), (7, 2)) self.assertEqual(coords.to_pygtp((8, 0)), (1, 1)) self.assertEqual(coords.to_pygtp((0, 0)), (1, 9)) self.assertEqual(coords.to_pygtp((7, 2)), (3, 2)) self.assertEqual(coords.to_kgs((0, 8)), 'J9') self.assertEqual(coords.to_kgs((8, 0)), 'A1')
def describe(self): sort_order = list(range(go.N * go.N + 1)) sort_order.sort(key=lambda i: ( self.child_N[i], self.child_action_score[i]), reverse=True) soft_n = self.child_N / sum(self.child_N) p_delta = soft_n - self.child_prior p_rel = p_delta / self.child_prior # Dump out some statistics output = [] # try: output.append("{q:.4f}\n".format(q=self.Q)) # except: # output.append("{q:.4f}\n".format(q=self.Q[0])) output.append(self.most_visited_path()) output.append( "move: action Q U P P-Dir N soft-N p-delta p-rel\n") output.append("\n".join(["{!s:6}: {: .3f}, {: .3f}, {:.3f}, {:.3f}, {:.3f}, {:4d} {:.4f} {: .5f} {: .2f}".format( coords.to_kgs(coords.from_flat(key)), self.child_action_score[key], self.child_Q[key], self.child_U[key], self.child_prior[key], self.original_prior[key], int(self.child_N[key]), soft_n[key], p_delta[key], p_rel[key]) for key in sort_order][:15])) return ''.join(output)
def describe(self): sort_order = list(range(go.N * go.N + 1)) sort_order.sort(key=lambda i: ( self.child_N[i], self.child_action_score[i]), reverse=True) soft_n = self.child_N / sum(self.child_N) p_delta = soft_n - self.child_prior p_rel = p_delta / self.child_prior # Dump out some statistics output = [] output.append("{q:.4f}\n".format(q=self.Q)) output.append(self.most_visited_path()) output.append( "move: action Q U P P-Dir N soft-N p-delta p-rel\n") output.append("\n".join(["{!s:6}: {: .3f}, {: .3f}, {:.3f}, {:.3f}, {:.3f}, {:4d} {:.4f} {: .5f} {: .2f}".format( coords.to_kgs(coords.from_flat(key)), self.child_action_score[key], self.child_Q[key], self.child_U[key], self.child_prior[key], self.original_prior[key], int(self.child_N[key]), soft_n[key], p_delta[key], p_rel[key]) for key in sort_order][:15])) return ''.join(output)
def test_parsing_9x9(self): self.assertEqual(coords.from_sgf('aa'), (0, 0)) self.assertEqual(coords.from_sgf('ac'), (2, 0)) self.assertEqual(coords.from_sgf('ca'), (0, 2)) self.assertEqual(coords.from_sgf(''), None) self.assertEqual(coords.to_sgf(None), '') self.assertEqual('aa', coords.to_sgf(coords.from_sgf('aa'))) self.assertEqual('sa', coords.to_sgf(coords.from_sgf('sa'))) self.assertEqual((1, 17), coords.from_sgf(coords.to_sgf((1, 17)))) self.assertEqual(coords.from_kgs('A1'), (8, 0)) self.assertEqual(coords.from_kgs('A9'), (0, 0)) self.assertEqual(coords.from_kgs('C2'), (7, 2)) self.assertEqual(coords.from_kgs('J2'), (7, 8)) self.assertEqual(coords.to_kgs((0, 8)), 'J9') self.assertEqual(coords.to_kgs((8, 0)), 'A1')
def describe(self): sort_order = list(range(go.N * go.N + 1)) sort_order.sort(key=lambda i: (self.child_N[i], self.child_action_score[i]), reverse=True) soft_n = self.child_N / max(1, sum(self.child_N)) prior = self.child_prior p_delta = soft_n - prior p_rel = np.divide(p_delta, prior, out=np.zeros_like(p_delta), where=prior != 0) # Dump out some statistics output = [] output.append("{q:.4f}\n".format(q=self.Q)) output.append(self.most_visited_path()) output.append( "move : action Q U P P-Dir N soft-N p-delta p-rel" ) for key in sort_order[:15]: if self.child_N[key] == 0: break output.append( "\n{!s:4} : {: .3f} {: .3f} {:.3f} {:.3f} {:.3f} {:5d} {:.4f} {: .5f} {: .2f}" .format(coords.to_kgs(coords.from_flat(key)), self.child_action_score[key], self.child_Q[key], self.child_U[key], self.child_prior[key], self.original_prior[key], int(self.child_N[key]), soft_n[key], p_delta[key], p_rel[key])) return ''.join(output)
def _minigui_report_position(self): root = self._player.get_root() position = root.position board = [] for row in range(go.N): for col in range(go.N): stone = position.board[row, col] if stone == go.BLACK: board.append("X") elif stone == go.WHITE: board.append("O") else: board.append(".") msg = { "id": hex(id(root)), "toPlay": "B" if position.to_play == 1 else "W", "moveNum": position.n, "stones": "".join(board), "gameOver": position.is_game_over(), "caps": position.caps, } if root.parent and root.parent.parent: msg["parentId"] = hex(id(root.parent)) msg["q"] = float(root.parent.Q) if position.recent: msg["move"] = coords.to_kgs(position.recent[-1].move) dbg("mg-position:%s" % json.dumps(msg, sort_keys=True))
def _dbg_game_state(self): position = self._game.position msg = {} board = [] for row in range(go.N): for col in range(go.N): stone = position.board[row, col] if stone == go.BLACK: board.append("X") elif stone == go.WHITE: board.append("O") else: board.append(".") msg["board"] = "".join(board) msg["toPlay"] = "Black" if position.to_play == 1 else "White" if position.recent: msg["lastMove"] = coords.to_kgs(position.recent[-1].move) else: msg["lastMove"] = None msg["n"] = position.n if self._game.root.parent and self._game.root.parent.parent: msg["q"] = self._game.root.parent.Q else: msg["q"] = 0 dbg("mg-gamestate:%s", json.dumps(msg, sort_keys=True))
def test_topleft(self): self.assertEqual((0, 8), coords.from_sgf('ia')) self.assertEqual((0, 8), coords.from_flat(8)) self.assertEqual((0, 8), coords.from_kgs('J9')) self.assertEqual('ia', coords.to_sgf((0, 8))) self.assertEqual(8, coords.to_flat((0, 8))) self.assertEqual('J9', coords.to_kgs((0, 8)))
def test_pass(self): self.assertEqual(coords.from_sgf(''), None) self.assertEqual(coords.from_flat(81), None) self.assertEqual(coords.from_kgs('pass'), None) self.assertEqual(coords.to_sgf(None), '') self.assertEqual(coords.to_flat(None), 81) self.assertEqual(coords.to_kgs(None), 'pass')
def test_topleft(self): self.assertEqual(coords.from_sgf('ia'), (0, 8)) self.assertEqual(coords.from_flat(8), (0, 8)) self.assertEqual(coords.from_kgs('J9'), (0, 8)) self.assertEqual(coords.to_sgf((0, 8)), 'ia') self.assertEqual(coords.to_flat((0, 8)), 8) self.assertEqual(coords.to_kgs((0, 8)), 'J9')
def test_upperleft(self): self.assertEqual(coords.from_sgf('aa'), (0, 0)) self.assertEqual(coords.from_flat(0), (0, 0)) self.assertEqual(coords.from_kgs('A9'), (0, 0)) self.assertEqual(coords.to_sgf((0, 0)), 'aa') self.assertEqual(coords.to_flat((0, 0)), 0) self.assertEqual(coords.to_kgs((0, 0)), 'A9')
def mvp_gg(self): """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17...""" output = [] for node in self.most_visited_path_nodes(): if max(node.child_N) <= 1: break output.append(coords.to_kgs(coords.from_flat(node.fmove))) return ' '.join(output)
def test_pass(self): self.assertEqual(None, coords.from_sgf('')) self.assertEqual(None, coords.from_flat(81)) self.assertEqual(None, coords.from_kgs('pass')) self.assertEqual('', coords.to_sgf(None)) self.assertEqual(81, coords.to_flat(None)) self.assertEqual('pass', coords.to_kgs(None))
def test_upperleft(self): self.assertEqual((0, 0), coords.from_sgf('aa')) self.assertEqual((0, 0), coords.from_flat(0)) self.assertEqual((0, 0), coords.from_kgs('A9')) self.assertEqual('aa', coords.to_sgf((0, 0))) self.assertEqual(0, coords.to_flat((0, 0))) self.assertEqual('A9', coords.to_kgs((0, 0)))
def most_visited_path(self): output = [] node = self for node in self.most_visited_path_nodes(): output.append("%s (%d) ==> " % ( coords.to_kgs(coords.from_flat(node.fmove)), node.N)) output.append("Q: {:.5f}\n".format(node.Q)) return ''.join(output)
def mvp_gg(self): """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17...""" node = self output = [] while node.children and max(node.child_N) > 1: next_kid = np.argmax(node.child_N) node = node.children[next_kid] output.append("%s" % coords.to_kgs(coords.from_flat(node.fmove))) return ' '.join(output)
def test_topleft(self): self.assertEqual(coords.from_sgf('ia'), (0, 8)) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 8), (0, 8)) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'J9'), (0, 8)) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (9, 9)), (0, 8)) self.assertEqual(coords.to_sgf((0, 8)), 'ia') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 8)), 8) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, (0, 8)), 'J9') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, (0, 8)), (9, 9))
def test_upperleft(self): self.assertEqual(coords.from_sgf('aa'), (0, 0)) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 0), (0, 0)) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'A9'), (0, 0)) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (1, 9)), (0, 0)) self.assertEqual(coords.to_sgf((0, 0)), 'aa') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 0)), 0) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, (0, 0)), 'A9') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, (0, 0)), (1, 9))
def mvp_gg(self): """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17...""" node = self output = [] while node.children and max(node.child_N) > 1: next_kid = np.argmax(node.child_N) node = node.children[next_kid] output.append("%s" % coords.to_kgs( coords.from_flat(node.fmove))) return ' '.join(output)
def test_topleft(self): self.assertEqual(coords.from_sgf('ia'), (0, 8)) self.assertEqual(coords.from_flat(8), (0, 8)) self.assertEqual(coords.from_kgs('J9'), (0, 8)) self.assertEqual(coords.from_pygtp((9, 9)), (0, 8)) self.assertEqual(coords.to_sgf((0, 8)), 'ia') self.assertEqual(coords.to_flat((0, 8)), 8) self.assertEqual(coords.to_kgs((0, 8)), 'J9') self.assertEqual(coords.to_pygtp((0, 8)), (9, 9))
def test_upperleft(self): self.assertEqual(coords.from_sgf('aa'), (0, 0)) self.assertEqual(coords.from_flat(0), (0, 0)) self.assertEqual(coords.from_kgs('A9'), (0, 0)) self.assertEqual(coords.from_pygtp((1, 9)), (0, 0)) self.assertEqual(coords.to_sgf((0, 0)), 'aa') self.assertEqual(coords.to_flat((0, 0)), 0) self.assertEqual(coords.to_kgs((0, 0)), 'A9') self.assertEqual(coords.to_pygtp((0, 0)), (1, 9))
def test_pass(self): self.assertEqual(coords.from_sgf(''), None) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 81), None) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'pass'), None) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (0, 0)), None) self.assertEqual(coords.to_sgf(None), '') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, None), 81) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, None), 'pass') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, None), (0, 0))
def test_pass(self): self.assertEqual(coords.from_sgf(''), None) self.assertEqual(coords.from_flat(81), None) self.assertEqual(coords.from_kgs('pass'), None) self.assertEqual(coords.from_pygtp((0, 0)), None) self.assertEqual(coords.to_sgf(None), '') self.assertEqual(coords.to_flat(None), 81) self.assertEqual(coords.to_kgs(None), 'pass') self.assertEqual(coords.to_pygtp(None), (0, 0))
def test_pass(self): self.assertEqual(coords.from_sgf(''), None) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 81), None) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'pass'), None) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (0, 0)), None) self.assertEqual(coords.to_sgf(None), '') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, None), 81) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, None), 'pass') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, None), (0, 0))
def play_move(self, c, color=None, mutate=False): # Obeys CGOS Rules of Play. In short: # No suicides # Chinese/area scoring # Positional superko (this is very crudely approximate at the moment.) if color is None: color = self.to_play pos = self if mutate else copy.deepcopy(self) if c is None: pos = pos.pass_move(mutate=mutate) return pos if not self.is_move_legal(c): raise IllegalMove('{} move at {} is illegal: \n{}'.format( 'Black' if self.to_play == BLACK else 'White', coords.to_kgs(c), self)) potential_ko = is_koish(self.board_size, self.board, c) place_stones(pos.board, color, [c]) captured_stones = pos.lib_tracker.add_stone(color, c) place_stones(pos.board, EMPTY, captured_stones) opp_color = color * -1 new_board_delta = np.zeros([self.board_size, self.board_size], dtype=np.int8) new_board_delta[c] = color place_stones(new_board_delta, color, captured_stones) if len(captured_stones) == 1 and potential_ko == opp_color: new_ko = list(captured_stones)[0] else: new_ko = None if pos.to_play == BLACK: new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1]) else: new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones)) pos.n += 1 pos.caps = new_caps pos.ko = new_ko pos.recent += (PlayerMove(color, c), ) # keep a rolling history of last 7 deltas - that's all we'll need to # extract the last 8 board states. pos.board_deltas = np.concatenate( (new_board_delta.reshape(1, self.board_size, self.board_size), pos.board_deltas[:6])) pos.to_play *= -1 return pos
def _minigui_report_search_status(self, leaves): """Prints the current MCTS search status to stderr. Reports the current search path, root node's child_Q, root node's child_N, the most visited path in a format that can be parsed by one of the STDERR_HANDLERS in minigui.ts. Args: leaves: list of leaf MCTSNodes returned by tree_search(). """ root = self._player.get_root() msg = { "moveNum": root.position.n, "toPlay": "B" if root.position.to_play == go.BLACK else "W", } if leaves: path = [] leaf = leaves[0] while leaf != root: path.append(leaf.fmove) leaf = leaf.parent msg["search"] = [coords.to_kgs(coords.from_flat(m)) for m in reversed(path)] else: msg["search"] = [] dq = root.child_Q - root.Q msg["dq"] = [int(round(x * 100)) for x in dq] msg["n"] = [int(n) for n in root.child_N] nodes = root.most_visited_path_nodes() pv = [coords.to_kgs(coords.from_flat(m.fmove)) for m in nodes] if pv != self._last_pv: msg["pv"] = pv self._last_pv = pv dbg("mg-search:%s" % json.dumps(msg, sort_keys=True))
def play_move(self, c, color=None, mutate=False): # Obeys CGOS Rules of Play. In short: # No suicides # Chinese/area scoring # Positional superko (this is very crudely approximate at the moment.) if color is None: color = self.to_play pos = self if mutate else copy.deepcopy(self) if c is None: pos = pos.pass_move(mutate=mutate) return pos if not self.is_move_legal(c): raise IllegalMove("{} move at {} is illegal: \n{}".format( "Black" if self.to_play == BLACK else "White", coords.to_kgs(c), self)) potential_ko = is_koish(self.board, c) place_stones(pos.board, color, [c]) captured_stones = pos.lib_tracker.add_stone(color, c) place_stones(pos.board, EMPTY, captured_stones) opp_color = color * -1 new_board_delta = np.zeros([N, N], dtype=np.int8) new_board_delta[c] = color place_stones(new_board_delta, color, captured_stones) if len(captured_stones) == 1 and potential_ko == opp_color: new_ko = list(captured_stones)[0] else: new_ko = None if pos.to_play == BLACK: new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1]) else: new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones)) pos.n += 1 pos.caps = new_caps pos.ko = new_ko pos.recent += (PlayerMove(color, c),) # keep a rolling history of last 7 deltas - that's all we'll need to # extract the last 8 board states. pos.board_deltas = np.concatenate(( new_board_delta.reshape(1, N, N), pos.board_deltas[:6])) pos.to_play *= -1 return pos
def test_upperleft(self): self.assertEqual(coords.from_sgf('aa'), (0, 0)) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 0), (0, 0)) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'A9'), (0, 0)) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (1, 9)), (0, 0)) self.assertEqual(coords.to_sgf((0, 0)), 'aa') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 0)), 0) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, (0, 0)), 'A9') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, (0, 0)), (1, 9))
def test_topleft(self): self.assertEqual(coords.from_sgf('ia'), (0, 8)) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 8), (0, 8)) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'J9'), (0, 8)) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (9, 9)), (0, 8)) self.assertEqual(coords.to_sgf((0, 8)), 'ia') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 8)), 8) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, (0, 8)), 'J9') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, (0, 8)), (9, 9))
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append("GAME END") break output.append( "%s (%d) ==> " % (coords.to_kgs(coords.from_flat(node.fmove)), node.N)) output.append("Q: {:.5f}\n".format(node.Q)) return ''.join(output)
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append("GAME END") break output.append("%s (%d) ==> " % (coords.to_kgs( coords.from_flat(node.fmove)), node.N)) output.append("Q: {:.5f}\n".format(node.Q)) return ''.join(output)
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append('GAME END') break output.append('{} ({}) ==> '.format( coords.to_kgs( self.board_size, coords.from_flat(self.board_size, node.fmove)), node.N)) output.append('Q: {:.5f}\n'.format(node.Q)) return ''.join(output)
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append('GAME END') break output.append('{} ({}) ==> '.format( coords.to_kgs(self.board_size, coords.from_flat(self.board_size, node.fmove)), node.N)) output.append('Q: {:.5f}\n'.format(node.Q)) return ''.join(output)
def extract_move_data(root_node, worker_id, completed_time, board_size): current_node = root_node.next move_data = [] move_num = 1 while current_node is not None: props = current_node.properties if 'B' in props: to_play = 1 move_played = props['B'][0] elif 'W' in props: to_play = -1 move_played = props['W'][0] else: import pdb; pdb.set_trace() move_played = coords.to_flat(coords.from_sgf(move_played)) post_Q, debug_rows = parse_comment_node(props['C'][0]) policy_prior = [0] * (board_size * board_size + 1) policy_prior_orig = policy_prior[:] mcts_visit_counts = policy_prior[:] mcts_visit_counts_norm = policy_prior[:] for debug_row in debug_rows: move = debug_row.move policy_prior[move] = debug_row.prior policy_prior_orig[move] = debug_row.orig_prior mcts_visit_counts[move] = debug_row.N mcts_visit_counts_norm[move] = debug_row.soft_N move_data.append({ 'worker_id': worker_id, 'completed_time': completed_time, 'move_num': move_num, 'turn_to_play': to_play, 'move': move_played, 'move_kgs': coords.to_kgs(coords.from_flat(move_played)), 'prior_Q': None, 'post_Q': post_Q, 'policy_prior': policy_prior, 'policy_prior_orig': policy_prior_orig, 'mcts_visit_counts': mcts_visit_counts, 'mcts_visit_counts_norm': mcts_visit_counts_norm, }) move_num += 1 current_node = current_node.next return move_data
def cmd_genmove(self, color=None): if color is not None: self._accomodate_out_of_turn(color) if self._courtesy_pass: # If courtesy pass is True and the previous move was a pass, we'll # pass too, regardless of score or our opinion on the game. position = self._player.get_position() if position.recent and position.recent[-1].move is None: return "pass" move = self._player.suggest_move(self._player.get_position()) if self._player.should_resign(): self._player.set_result(-1 * self._player.get_position().to_play, was_resign=True) return "resign" self._player.play_move(move) if self._player.get_root().is_done(): self._player.set_result(self._player.get_position().result(), was_resign=False) return coords.to_kgs(move)
def cmd_gamestate(self): position = self._player.get_position() root = self._player.get_root() msg = {} board = [] for row in range(go.N): for col in range(go.N): stone = position.board[row, col] if stone == go.BLACK: board.append("X") elif stone == go.WHITE: board.append("O") else: board.append(".") msg["board"] = "".join(board) if position.recent: msg["lastMove"] = coords.to_kgs(position.recent[-1].move) else: msg["lastMove"] = None msg["toPlay"] = "B" if position.to_play == 1 else "W" msg["moveNum"] = position.n msg["q"] = root.parent.Q if root.parent and root.parent.parent else 0 msg["gameOver"] = position.is_game_over() dbg("mg-gamestate:%s" % json.dumps(msg, sort_keys=True))
def play(board_size, network, readouts, resign_threshold, simultaneous_leaves, verbosity=0): """Plays out a self-play match. Args: board_size: the go board size network: the DualNet model readouts: the number of readouts in MCTS resign_threshold: the threshold to resign at in the match simultaneous_leaves: the number of simultaneous leaves in MCTS verbosity: the verbosity of the self-play match Returns: the final position the n x 362 tensor of floats representing the mcts search probabilities the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game. """ player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=simultaneous_leaves) # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if verbosity >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or ( verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur)) if verbosity >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def fmt(move): return '{}-{}'.format('b' if move.color == 1 else 'w', coords.to_kgs(self.board_size, move.move))
def fmt(move): return "{}-{}".format('b' if move.color == 1 else 'w', coords.to_kgs(move.move)) path = " ".join(fmt(move) for move in pos.recent[-diff:])
def heatmap(self, sort_order, node, prop): return "\n".join(["{!s:6} {}".format( coords.to_kgs(coords.from_flat(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0][:20])
def fmt(move): return "{}-{}".format('b' if move.color == 1 else 'w', coords.to_kgs(move.move)) path = " ".join(fmt(move) for move in pos.recent[-diff:])
def _heatmap(self, sort_order, node, prop): return "\n".join([ "{!s:6} {}".format(coords.to_kgs(coords.from_flat(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0 ][:20])
def play(network): ''' Plays out a self-play match, returning a MCTSPlayer object containing: - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' readouts = FLAGS.num_readouts # defined in strategies.py # Disable resign in 5% of games if random.random() < FLAGS.resign_disable_pct: resign_threshold = -1.0 else: resign_threshold = None player = MCTSPlayer(network, resign_threshold=resign_threshold) player.initialize_game() # Must run this once at the start to expand the root node. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if FLAGS.verbose >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (FLAGS.verbose >= 2) or (FLAGS.verbose >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if FLAGS.verbose >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if FLAGS.verbose >= 2: utils.dbg("%s: %.3f" % (player.result_string, player.root.Q)) utils.dbg(player.root.position, player.root.position.score()) return player
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) # print("prob", prob) # print("val", val) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def play_move(self, c, color=None, mutate=False): """Obeys CGOS Rules of Play. In short: No suicides Chinese/area scoring Positional superko (this is very crudely approximate at the moment.) Args: c: the coordinate to play from. color: the color of the player to play. mutate: Returns: The position of next move. Raises: IllegalMove: if the input c is an illegal move. """ if color is None: color = self.to_play pos = self if mutate else copy.deepcopy(self) if c is None: pos = pos.pass_move(mutate=mutate) return pos if not self.is_move_legal(c): raise IllegalMove('{} move at {} is illegal: \n{}'.format( 'Black' if self.to_play == BLACK else 'White', coords.to_kgs(self.board_size, c), self)) potential_ko = is_koish(self.board_size, self.board, c) place_stones(pos.board, color, [c]) captured_stones = pos.lib_tracker.add_stone(color, c) place_stones(pos.board, EMPTY, captured_stones) opp_color = -1 * color new_board_delta = np.zeros([self.board_size, self.board_size], dtype=np.int8) new_board_delta[c] = color place_stones(new_board_delta, color, captured_stones) if len(captured_stones) == 1 and potential_ko == opp_color: new_ko = list(captured_stones)[0] else: new_ko = None if pos.to_play == BLACK: new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1]) else: new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones)) pos.n += 1 pos.caps = new_caps pos.ko = new_ko pos.recent += (PlayerMove(color, c),) # keep a rolling history of last 7 deltas - that's all we'll need to # extract the last 8 board states. pos.board_deltas = np.concatenate(( new_board_delta.reshape(1, self.board_size, self.board_size), pos.board_deltas[:6])) pos.to_play *= -1 return pos
def fmt(move): return "{}-{}".format('b' if move.color == go.BLACK else 'w', coords.to_kgs(move.move))
def fmt(move): return '{}-{}'.format('b' if move.color == 1 else 'w', coords.to_kgs(self.board_size, move.move))