def test_parsing_9x9(self): self.assertEqual(coords.parse_sgf_coords('aa'), (0, 0)) self.assertEqual(coords.parse_sgf_coords('ac'), (2, 0)) self.assertEqual(coords.parse_sgf_coords('ca'), (0, 2)) self.assertEqual(coords.parse_sgf_coords(''), None) self.assertEqual(coords.unparse_sgf_coords(None), '') self.assertEqual( 'aa', coords.unparse_sgf_coords(coords.parse_sgf_coords('aa'))) self.assertEqual( 'sa', coords.unparse_sgf_coords(coords.parse_sgf_coords('sa'))) self.assertEqual( (1, 17), coords.parse_sgf_coords(coords.unparse_sgf_coords( (1, 17)))) self.assertEqual(coords.parse_kgs_coords('A1'), (8, 0)) self.assertEqual(coords.parse_kgs_coords('A9'), (0, 0)) self.assertEqual(coords.parse_kgs_coords('C2'), (7, 2)) self.assertEqual(coords.parse_kgs_coords('J2'), (7, 8)) self.assertEqual(coords.parse_pygtp_coords((1, 1)), (8, 0)) self.assertEqual(coords.parse_pygtp_coords((1, 9)), (0, 0)) self.assertEqual(coords.parse_pygtp_coords((3, 2)), (7, 2)) self.assertEqual(coords.unparse_pygtp_coords((8, 0)), (1, 1)) self.assertEqual(coords.unparse_pygtp_coords((0, 0)), (1, 9)) self.assertEqual(coords.unparse_pygtp_coords((7, 2)), (3, 2)) self.assertEqual(coords.to_human_coord((0, 8)), 'J9') self.assertEqual(coords.to_human_coord((8, 0)), 'A1')
def heatmap(self, sort_order, node, prop): return "\n".join([ "{!s:6} {}".format( coords.to_human_coord(coords.unflatten_coords(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0 ][:20])
def describe(self): sort_order = list(range(go.N * go.N + 1)) sort_order.sort(key=lambda i: (self.child_N[i], self.child_action_score[i]), reverse=True) soft_n = self.child_N / sum(self.child_N) p_delta = soft_n - self.child_prior p_rel = p_delta / self.child_prior # Dump out some statistics output = [] output.append("{q:.4f}\n".format(q=self.Q)) output.append(self.most_visited_path()) output.append( "move: action Q U P P-Dir N soft-N p-delta p-rel\n" ) output.append("\n".join([ "{!s:6}: {: .3f}, {: .3f}, {:.3f}, {:.3f}, {:.3f}, {:4d} {:.4f} {: .5f} {: .2f}" .format(coords.to_human_coord(coords.unflatten_coords(key)), self.child_action_score[key], self.child_Q[key], self.child_U[key], self.child_prior[key], self.original_prior[key], int(self.child_N[key]), soft_n[key], p_delta[key], p_rel[key]) for key in sort_order ][:15])) return ''.join(output)
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -0.9999 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) # Sets is_done to be True if player.should resign. if player.should_resign(): # TODO: make this less side-effecty. break move = player.pick_move() player.play_move(move) if player.is_done(): # TODO: actually handle the result instead of ferrying it around as a property. player.result = player.position.result() break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts / 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_human_coord(coords.unflatten_coords(player.root.fmove))) # TODO: break when i >= 2 * go.N * go.N (where is this being done now??...) return player
def test_pass(self): self.assertEqual(coords.parse_sgf_coords(''), None) self.assertEqual(coords.unflatten_coords(81), None) self.assertEqual(coords.parse_kgs_coords('pass'), None) self.assertEqual(coords.parse_pygtp_coords((0, 0)), None) self.assertEqual(coords.unparse_sgf_coords(None), '') self.assertEqual(coords.flatten_coords(None), 81) self.assertEqual(coords.to_human_coord(None), 'pass') self.assertEqual(coords.unparse_pygtp_coords(None), (0, 0))
def test_topleft(self): self.assertEqual(coords.parse_sgf_coords('ia'), (0, 8)) self.assertEqual(coords.unflatten_coords(8), (0, 8)) self.assertEqual(coords.parse_kgs_coords('J9'), (0, 8)) self.assertEqual(coords.parse_pygtp_coords((9, 9)), (0, 8)) self.assertEqual(coords.unparse_sgf_coords((0, 8)), 'ia') self.assertEqual(coords.flatten_coords((0, 8)), 8) self.assertEqual(coords.to_human_coord((0, 8)), 'J9') self.assertEqual(coords.unparse_pygtp_coords((0, 8)), (9, 9))
def test_upperleft(self): self.assertEqual(coords.parse_sgf_coords('aa'), (0, 0)) self.assertEqual(coords.unflatten_coords(0), (0, 0)) self.assertEqual(coords.parse_kgs_coords('A9'), (0, 0)) self.assertEqual(coords.parse_pygtp_coords((1, 9)), (0, 0)) self.assertEqual(coords.unparse_sgf_coords((0, 0)), 'aa') self.assertEqual(coords.flatten_coords((0, 0)), 0) self.assertEqual(coords.to_human_coord((0, 0)), 'A9') self.assertEqual(coords.unparse_pygtp_coords((0, 0)), (1, 9))
def mvp_gg(self): """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17...""" node = self output = [] while node.children and max(node.child_N) > 1: next_kid = np.argmax(node.child_N) node = node.children[next_kid] output.append( "%s" % coords.to_human_coord(coords.unflatten_coords(node.fmove))) return ' '.join(output)
def play_move(self, c, color=None, mutate=False): # Obeys CGOS Rules of Play. In short: # No suicides # Chinese/area scoring # Positional superko (this is very crudely approximate at the moment.) if color is None: color = self.to_play pos = self if mutate else copy.deepcopy(self) if c is None: pos = pos.pass_move(mutate=mutate) return pos if not self.is_move_legal(c): raise IllegalMove("{} move at {} is illegal: \n{}".format( "Black" if self.to_play == BLACK else "White", coords.to_human_coord(c), self)) potential_ko = is_koish(self.board, c) place_stones(pos.board, color, [c]) captured_stones = pos.lib_tracker.add_stone(color, c) place_stones(pos.board, EMPTY, captured_stones) opp_color = color * -1 new_board_delta = np.zeros([N, N], dtype=np.int8) new_board_delta[c] = color place_stones(new_board_delta, color, captured_stones) if len(captured_stones) == 1 and potential_ko == opp_color: new_ko = list(captured_stones)[0] else: new_ko = None if pos.to_play == BLACK: new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1]) else: new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones)) pos.n += 1 pos.caps = new_caps pos.ko = new_ko pos.recent += (PlayerMove(color, c),) # keep a rolling history of last 7 deltas - that's all we'll need to # extract the last 8 board states. pos.board_deltas = np.concatenate(( new_board_delta.reshape(1, N, N), pos.board_deltas[:6])) pos.to_play *= -1 return pos
def play_move(self, c, color=None, mutate=False): # Obeys CGOS Rules of Play. In short: # No suicides # Chinese/area scoring # Positional superko (this is very crudely approximate at the moment.) if color is None: color = self.to_play pos = self if mutate else copy.deepcopy(self) if c is None: pos = pos.pass_move(mutate=mutate) return pos if not self.is_move_legal(c): raise IllegalMove("{} move at {} is illegal: \n{}".format( "Black" if self.to_play == BLACK else "White", coords.to_human_coord(c), self)) potential_ko = is_koish(self.board, c) place_stones(pos.board, color, [c]) captured_stones = pos.lib_tracker.add_stone(color, c) place_stones(pos.board, EMPTY, captured_stones) opp_color = color * -1 new_board_delta = np.zeros([N, N], dtype=np.int8) new_board_delta[c] = color place_stones(new_board_delta, color, captured_stones) if len(captured_stones) == 1 and potential_ko == opp_color: new_ko = list(captured_stones)[0] else: new_ko = None if pos.to_play == BLACK: new_caps = (pos.caps[0] + len(captured_stones), pos.caps[1]) else: new_caps = (pos.caps[0], pos.caps[1] + len(captured_stones)) pos.n += 1 pos.caps = new_caps pos.ko = new_ko pos.recent += (PlayerMove(color, c), ) # keep a rolling history of last 7 deltas - that's all we'll need to # extract the last 8 board states. pos.board_deltas = np.concatenate( (new_board_delta.reshape(1, N, N), pos.board_deltas[:6])) pos.to_play *= -1 return pos
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append("GAME END") break output.append("%s (%d) ==> " % (coords.to_human_coord( coords.unflatten_coords(node.fmove)), node.N)) output.append("Q: {:.5f}\n".format(node.Q)) return ''.join(output)
def show_path_to_root(self, node): pos = node.position diff = node.position.n - self.root.position.n if len(pos.recent) == 0: return fmt = lambda move: "{}-{}".format('b' if move.color == 1 else 'w', coords.to_human_coord(move.move)) path = " ".join(fmt(move) for move in pos.recent[-diff:]) if node.position.n >= MAX_DEPTH: path += " (depth cutoff reached) %0.1f" % node.position.score() elif node.position.is_game_over(): path += " (game over) %0.1f" % node.position.score() return path
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_human_coord(coords.unflatten_coords(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def heatmap(self, sort_order, node, prop): return "\n".join(["{!s:6} {}".format( coords.to_human_coord(coords.unflatten_coords(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0][:20])
def fmt(move): return "{}-{}".format('b' if move.color == 1 else 'w', coords.to_human_coord(move.move))
def fmt(move): return "{}-{}".format('b' if move.color == 1 else 'w', coords.to_human_coord(move.move)) path = " ".join(fmt(move) for move in pos.recent[-diff:])