def _minigui_report_search_status(self, leaves): """Prints the current MCTS search status to stderr. Reports the current search path, root node's child_Q, root node's child_N, the most visited path in a format that can be parsed by one of the STDERR_HANDLERS in minigui.ts. Args: leaves: list of leaf MCTSNodes returned by tree_search(). """ root = self._player.get_root() position = root.position msg = { "id": hex(id(root)), "n": int(root.N), "q": float(root.Q), } msg["childQ"] = [int(round(q * 1000)) for q in root.child_Q] msg["childN"] = [int(n) for n in root.child_N] ranked_children = root.rank_children() variations = {} for i in ranked_children[:15]: if root.child_N[i] == 0 or i not in root.children: break c = coords.to_gtp(coords.from_flat(i)) child = root.children[i] nodes = child.most_visited_path_nodes() moves = [coords.to_gtp(coords.from_flat(m.fmove)) for m in nodes] variations[c] = { "n": int(root.child_N[i]), "q": float(root.child_Q[i]), "moves": [c] + moves, } if leaves: path = [] leaf = leaves[0] while leaf != root: path.append(leaf.fmove) leaf = leaf.parent if path: path.reverse() variations["live"] = { "n": int(root.child_N[path[0]]), "q": float(root.child_Q[path[0]]), "moves": [coords.to_gtp(coords.from_flat(m)) for m in path] } if variations: msg["variations"] = variations dbg("mg-update:%s" % json.dumps(msg, sort_keys=True))
def main(argv): # It takes a couple of seconds to import anything from tensorflow, so only # do it if we need to read from GCS. path = argv[1] if path.startswith('gs://'): from tensorflow import gfile f = gfile.GFile(path, 'r') else: f = open(path, 'r') contents = f.read() f.close() # Determine the board size before importing any Minigo libraries because # require that the BOARD_SIZE environment variable is set correctly before # import. m = re.search(r'SZ\[([^]]+)', contents) if not m: print('Couldn\'t find SZ node, assuming 19x19 board') board_size = 19 else: board_size = int(m.group(1)) # Set the board size and import the Minigo libs. os.environ['BOARD_SIZE'] = str(board_size) import coords import go import sgf_wrapper # Replay the game. for x in sgf_wrapper.replay_sgf(contents): to_play = 'B' if x.position.to_play == 1 else 'W' print('{}>> {}: {}\n'.format(x.position, to_play, coords.to_gtp(x.next_move)))
def print_example(examples, i): example = examples[i] p = parse_board(example) print('\nExample %d of %d, %s to play, winner is %s' % (i + 1, len(examples), 'Black' if p.to_play == 1 else 'White', 'Black' if example.value > 0 else 'White')) if example.n != -1: print( 'N:%d Q:%.3f picked:%s' % (example.n, example.q, coords.to_gtp(coords.from_flat(example.c)))) board_lines = str(p).split('\n')[:-2] mean = np.mean(example.pi[example.pi > 0]) mx = np.max(example.pi) pi_lines = ['PI'] for row in range(go.N): pi = [] for col in range(go.N): stone = p.board[row, col] idx = row * go.N + col if example.c != -1: picked = example.c == row * go.N + col else: picked = False pi.append(format_pi(example.pi[idx], stone, mean, mx, picked)) pi_lines.append(' '.join(pi)) pi_lines.append( format_pi(example.pi[-1], go.EMPTY, mean, mx, example.c == go.N * go.N)) for b, p in zip(board_lines, pi_lines): print('%s | %s' % (b, p))
def play_move(self, c, color=None, mutate=False): # Obeys CGOS Rules of Play. In short: # No suicides # Chinese/area scoring # Positional superko (this is very crudely approximate at the moment.) #print('board:',self.board); #print('input c:',c) if color is None: color = self.to_play pos = self if mutate else copy.deepcopy(self) if c is None: pos = pos.pass_move(mutate=mutate) return pos if not self.is_move_legal(c): raise IllegalMove("{} move at {} is illegal: \n{}".format( "Black" if self.to_play == BLACK else "White", coords.to_gtp(c), self)) potential_ko = is_koish(self.board, c) place_stones(pos.board, color, [c]) captured_stones = pos.lib_tracker.add_stone(color, c) place_stones(pos.board, EMPTY, captured_stones) opp_color = color * -1 new_board_delta = np.zeros([N, N], dtype=np.int8) new_board_delta[c] = color place_stones(new_board_delta, color, captured_stones) if len(captured_stones) == 1 and potential_ko == opp_color: new_ko = list(captured_stones)[0] else: new_ko = None if pos.to_play == BLACK: new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1]) else: new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones)) pos.n += 1 pos.caps = new_caps pos.ko = new_ko pos.recent += (PlayerMove(color, c), ) # keep a rolling history of last 7 deltas - that's all we'll need to # extract the last 8 board states. pos.board_deltas = np.concatenate( (new_board_delta.reshape(1, N, N), pos.board_deltas[:6])) pos.to_play *= -1 #print('output pos:\n',pos) #print('.........................') return pos
def describe(self): ranked_children = self.rank_children() soft_n = self.child_N / max(1, sum(self.child_N)) prior = self.child_prior p_delta = soft_n - prior p_rel = np.divide(p_delta, prior, out=np.zeros_like( p_delta), where=prior != 0) # Dump out some statistics output = [] output.append("{q:.4f}\n".format(q=self.Q)) output.append(self.most_visited_path()) output.append( "move : action Q U P P-Dir N soft-N p-delta p-rel") for i in ranked_children[:15]: if self.child_N[i] == 0: break output.append("\n{!s:4} : {: .3f} {: .3f} {:.3f} {:.3f} {:.3f} {:5d} {:.4f} {: .5f} {: .2f}".format( coords.to_gtp(coords.from_flat(i)), self.child_action_score[i], self.child_Q[i], self.child_U[i], self.child_prior[i], self.original_prior[i], int(self.child_N[i]), soft_n[i], p_delta[i], p_rel[i])) return ''.join(output)
def describe(self): sort_order = list(range(go.N * go.N + 1)) sort_order.sort(key=lambda i: ( self.child_N[i], self.child_action_score[i]), reverse=True) soft_n = self.child_N / max(1, sum(self.child_N)) prior = self.child_prior p_delta = soft_n - prior p_rel = np.divide(p_delta, prior, out=np.zeros_like( p_delta), where=prior != 0) # Dump out some statistics output = [] output.append("{q:.4}\n".format(q=str(self.Q))) output.append(self.most_visited_path()) output.append( "move : action Q U P P-Dir N soft-N p-delta p-rel") for key in sort_order[:15]: if self.child_N[key] == 0: break output.append("\n{!s:4} : {: .3f} {: .3f} {:.3f} {:.3f} {:.3f} {:5d} {:.4f} {: .5f} {: .2f}".format( coords.to_gtp(coords.from_flat(key)), self.child_action_score[key], self.child_Q[key], self.child_U[key], self.child_prior[key], self.original_prior[key], int(self.child_N[key]), soft_n[key], p_delta[key], p_rel[key])) return ''.join(output)
def _minigui_report_position(self): root = self._player.get_root() position = root.position board = [] for row in range(go.N): for col in range(go.N): stone = position.board[row, col] if stone == go.BLACK: board.append("X") elif stone == go.WHITE: board.append("O") else: board.append(".") msg = { "id": hex(id(root)), "toPlay": "B" if position.to_play == 1 else "W", "moveNum": position.n, "stones": "".join(board), "gameOver": position.is_game_over(), "caps": position.caps, } if root.parent and root.parent.parent: msg["parentId"] = hex(id(root.parent)) msg["q"] = float(root.parent.Q) if position.recent: msg["move"] = coords.to_gtp(position.recent[-1].move) dbg("mg-position:%s" % json.dumps(msg, sort_keys=True))
def test_parsing_9x9(self): self.assertEqual((0, 0), coords.from_sgf('aa')) self.assertEqual((2, 0), coords.from_sgf('ac')) self.assertEqual((0, 2), coords.from_sgf('ca')) self.assertEqual(None, coords.from_sgf('')) self.assertEqual('', coords.to_sgf(None)) self.assertEqual('aa', coords.to_sgf(coords.from_sgf('aa'))) self.assertEqual('sa', coords.to_sgf(coords.from_sgf('sa'))) self.assertEqual((1, 17), coords.from_sgf(coords.to_sgf((1, 17)))) self.assertEqual((8, 0), coords.from_gtp('A1')) self.assertEqual((0, 0), coords.from_gtp('A9')) self.assertEqual((7, 2), coords.from_gtp('C2')) self.assertEqual((7, 8), coords.from_gtp('J2')) self.assertEqual('J9', coords.to_gtp((0, 8))) self.assertEqual('A1', coords.to_gtp((8, 0)))
def play(network): """Plays out a self-play match, returning a MCTSPlayer object containing: - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game """ readouts = FLAGS.num_readouts # defined in strategies.py # Disable resign in 5% of games if random.random() < FLAGS.resign_disable_pct: resign_threshold = -1.0 else: resign_threshold = None player = MCTSPlayer(network, resign_threshold=resign_threshold) player.initialize_game() # Must run this once at the start to expand the root node. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if FLAGS.verbose >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (FLAGS.verbose >= 2) or (FLAGS.verbose >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if FLAGS.verbose >= 3: print("Played >>", coords.to_gtp(coords.from_flat(player.root.fmove))) if FLAGS.verbose >= 2: utils.dbg("%s: %.3f" % (player.result_string, player.root.Q)) utils.dbg(player.root.position, player.root.position.score()) return player
def mvp_gg(self): """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17...""" output = [] for node in self.most_visited_path_nodes(): if max(node.child_N) <= 1: break output.append(coords.to_gtp(coords.from_flat(node.fmove))) return ' '.join(output)
def test_topleft(self): self.assertEqual((0, 8), coords.from_sgf('ia')) self.assertEqual((0, 8), coords.from_flat(8)) self.assertEqual((0, 8), coords.from_gtp('J9')) self.assertEqual('ia', coords.to_sgf((0, 8))) self.assertEqual(8, coords.to_flat((0, 8))) self.assertEqual('J9', coords.to_gtp((0, 8)))
def test_upperleft(self): self.assertEqual((0, 0), coords.from_sgf('aa')) self.assertEqual((0, 0), coords.from_flat(0)) self.assertEqual((0, 0), coords.from_gtp('A9')) self.assertEqual('aa', coords.to_sgf((0, 0))) self.assertEqual(0, coords.to_flat((0, 0))) self.assertEqual('A9', coords.to_gtp((0, 0)))
def most_visited_path(self): output = [] node = self for node in self.most_visited_path_nodes(): output.append("%s (%d) ==> " % ( coords.to_gtp(coords.from_flat(node.fmove)), node.N)) output.append("Q: {:.5f}\n".format(node.Q)) return ''.join(output)
def test_pass(self): self.assertEqual(None, coords.from_sgf('')) self.assertEqual(None, coords.from_sgf('tt')) self.assertEqual(None, coords.from_flat(81)) self.assertEqual(None, coords.from_gtp('pass')) self.assertEqual(None, coords.from_gtp('PASS')) self.assertEqual('', coords.to_sgf(None)) self.assertEqual(81, coords.to_flat(None)) self.assertEqual('pass', coords.to_gtp(None))
def maybe_add_child(self, fcoord): """Adds child node for fcoord if it doesn't already exist, and returns it.""" if fcoord not in self.children: new_position = self.position.play_move(coords.from_flat(fcoord)) new_game_state = self.game_state.play_move( coords.to_gtp(coords.from_flat(fcoord))) self.children[fcoord] = MCTSNode(new_position, new_game_state, fmove=fcoord, parent=self) return self.children[fcoord]
def simulate(network, board = None, steps=20): ''' Simulates rollout of network for given number of steps (to help understand the tactic) ''' pos = Position(board=board) for i in range(steps): policy, V = network.run(pos) best_move = np.argmax(policy) print('Best move', coords.to_gtp(coords.from_flat(best_move))) pos = pos.play_move(coords.from_flat(best_move)) print(pos)
def cmd_genmove(self, color=None): if color is not None: self._accomodate_out_of_turn(color) if self._courtesy_pass: # If courtesy pass is True and the previous move was a pass, we'll # pass too, regardless of score or our opinion on the game. position = self._player.get_position() if position.recent and position.recent[-1].move is None: return "pass" move = self._player.suggest_move(self._player.get_position()) if self._player.should_resign(): self._player.set_result(-1 * self._player.get_position().to_play, was_resign=True) return "resign" self._player.play_move(move) if self._player.get_root().is_done(): self._player.set_result(self._player.get_position().result(), was_resign=False) return coords.to_gtp(move)
def play_mcts(network, board=None): pos = Position(board=board) player = get_mcts_player(network, pos) node = player.root children = node.rank_children() soft_n = node.child_N / max(1, sum(node.child_N)) original_moves = {} heatmap = np.zeros((N, N), dtype=np.float) a_b = None for i in children: if node.child_N[i] == 0: break if a_b is None: a_b = coords.from_flat(i) original_moves[coords.to_gtp(coords.from_flat(i))] = soft_n[i] a_b = player.pick_move() # player.play_move(move) a_b_coords = a_b a_b = coords.to_gtp(a_b) print(original_moves) print("best action: ", a_b) print(node.position) p = original_moves[a_b] print(p) for i in range(N): for j in range(N): if board[i][j] == -1 or board[i][j] == 1: new_board = np.copy(board) new_board[i, j] = 0 new_pos = perturb_position(pos, new_board) if new_pos.is_move_legal(a_b_coords): player = get_mcts_player(network, new_pos) node = player.root print(node.position) new_moves = {} children = node.rank_children() soft_n = node.child_N / max(1, sum(node.child_N)) for ch in children: if node.child_N[ch] == 0: break new_moves[coords.to_gtp(coords.from_flat(ch))] = soft_n[ch] new_a_b = player.pick_move() # player.play_move(move) new_a_b = coords.to_gtp(new_a_b) # if new_a_b == 'F5': print("---------------------") # print("Moves: ", new_moves) if a_b in new_moves: new_p = new_moves[a_b] else: new_p = 0. print("New best move", new_a_b) print("p", new_p) print("------------------") K = cross_entropy_mcts(original_moves, new_moves, a_b) if K == -1: print("index", i, j) heatmap[i, j] = -1.0 continue dP = p - new_p if dP > 0: heatmap[i, j] = 2.0*dP/(1. + dP*K) else: heatmap[i, j] = -1.0 heatmap[heatmap == -1] = np.max(heatmap) heatmap[heatmap<np.max(heatmap)/1.5] = 0 plt.imshow(heatmap, cmap='jet') plt.colorbar() plt.show() return player
def play_network(network, board=None): ''' Generates saliency maps of 3 methods given a board position ''' pos = Position(board=board) original_moves = {} heatmap = np.zeros((N,N), dtype=np.float) policy, V = network.run(pos) best_move = np.argmax(policy) print("Best Move is", coords.to_gtp(coords.from_flat(best_move))) p = np.max(policy) player = get_mcts_player(network, pos) node = player.root old_Q = node.child_Q[best_move] atariV = np.zeros([N, N], dtype=np.float) atariP = np.zeros([N, N], dtype=np.float) delQ = np.zeros([N, N], dtype=np.float) heatmap = np.zeros([N, N], dtype=np.float) for i in range(N): for j in range(N): if board[i, j] == 1 or board[i, j] == -1: print(i, j) print("---------------------") new_board = np.copy(board) new_board[i, j] = 0 new_pos = perturb_position(pos, new_board) new_policy, new_V = network.run(new_pos) new_p = new_policy[best_move] player = get_mcts_player(network, pos) node = player.root # print(node.describe()) new_Q = node.child_Q[best_move] atariV[i, j] = 0.5*((V - new_V)**2) atariP[i, j] = 0.5*np.linalg.norm(policy - new_policy) dP = p - new_p dQ = old_Q - new_Q K = cross_entropy(policy, new_policy, best_move) if dP>0: heatmap[i, j] = 2*dP/(1 + dP*K) if dQ>0: delQ[i, j] = dQ atariV = (atariV - np.min(atariV))/(np.max(atariV) - np.min(atariV)) atariP = (atariP - np.min(atariP))/(np.max(atariP) - np.min(atariP)) # heatmap[heatmap < np.max(heatmap)/3] = 0 # atariV[atariV < np.max(atariV)/3] = 0 # atariP[atariP < np.max(atariP)/3] = 0 # delQ[delQ < np.max(delQ)/3] = 0 frame = np.zeros((N,N,3)) frame = saliency_combine(atariV, frame, blur=256, channel=2) frame = saliency_combine(atariP, frame, blur=256, channel=0) plt.figure(1) plt.imshow(atariV, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'atariV.png') plt.show() plt.figure(2) plt.imshow(atariP, cmap= 'Reds') plt.colorbar() plt.savefig(save_path + 'atariP.png') plt.show() plt.figure(3) plt.imshow(frame) plt.savefig(save_path + 'atari.png') plt.show() plt.figure(4) plt.imshow(delQ, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'deltaQ.png') plt.show() plt.figure(5) plt.imshow(heatmap, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'entropy.png') plt.show()
def extract_move_data(root_node, worker_id, completed_time, board_size): current_node = root_node.next move_data = [] move_num = 1 while current_node is not None: props = current_node.properties if 'B' in props: to_play = 1 move_played = props['B'][0] elif 'W' in props: to_play = -1 move_played = props['W'][0] else: import pdb pdb.set_trace() move_played = coords.to_flat(coords.from_sgf(move_played)) post_Q, debug_rows = parse_comment_node(props['C'][0]) def get_row_data(debug_row): column_names = ["prior", "orig_prior", "N", "soft_N"] return [getattr(debug_row, field) for field in column_names] if FLAGS.only_top_move: assert len(debug_rows) <= 1 row_data = list(map(get_row_data, debug_rows)) else: row_data = [[0] * 4 for _ in range(board_size * board_size + 1)] for debug_row in debug_rows: move = debug_row.move row_data[move] = get_row_data(debug_row) policy_prior, policy_prior_orig, mcts_visits, mcts_visits_norm = \ zip(*row_data) move_data.append({ 'worker_id': worker_id, 'completed_time': completed_time, 'move_num': move_num, 'turn_to_play': to_play, 'move': move_played, 'move_kgs': coords.to_gtp(coords.from_flat(move_played)), 'prior_Q': None, 'post_Q': post_Q, 'policy_prior': policy_prior, 'policy_prior_orig': policy_prior_orig, 'mcts_visit_counts': mcts_visits, 'mcts_visit_counts_norm': mcts_visits_norm, }) move_num += 1 current_node = current_node.next return move_data
def _heatmap(self, sort_order, node, prop): return "\n".join([ "{!s:6} {}".format(coords.to_gtp(coords.from_flat(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0 ][:20])
def fmt(move): return "{}-{}".format('b' if move.color == go.BLACK else 'w', coords.to_gtp(move.move))