def test_flatten(self): self.assertEqual(coords.flatten_coords((0, 0)), 0) self.assertEqual(coords.flatten_coords((0, 3)), 3) self.assertEqual(coords.flatten_coords((3, 0)), 27) self.assertEqual(coords.unflatten_coords(27), (3, 0)) self.assertEqual(coords.unflatten_coords(10), (1, 1)) self.assertEqual(coords.unflatten_coords(80), (8, 8)) self.assertEqual(coords.flatten_coords(coords.unflatten_coords(10)), 10) self.assertEqual( coords.unflatten_coords(coords.flatten_coords((5, 4))), (5, 4))
def test_proper_move_transform(self): # Check that the reinterpretation of 362 = 19*19 + 1 during symmetry # application is consistent with coords.unflatten_coords move_array = np.arange(go.N ** 2 + 1) coord_array = np.zeros([go.N, go.N]) for c in range(go.N ** 2): coord_array[coords.unflatten_coords(c)] = c for s in symmetries.SYMMETRIES: with self.subTest(symmetry=s): transformed_moves = apply_p(s, move_array) transformed_board = apply_f(s, coord_array) for new_coord, old_coord in enumerate(transformed_moves[:-1]): self.assertEqual( old_coord, transformed_board[coords.unflatten_coords(new_coord)])
def test_proper_move_transform(self): # Check that the reinterpretation of 362 = 19*19 + 1 during symmetry # application is consistent with coords.unflatten_coords move_array = np.arange(go.N**2 + 1) coord_array = np.zeros([go.N, go.N]) for c in range(go.N**2): coord_array[coords.unflatten_coords(c)] = c for s in symmetries.SYMMETRIES: with self.subTest(symmetry=s): transformed_moves = apply_p(s, move_array) transformed_board = apply_f(s, coord_array) for new_coord, old_coord in enumerate(transformed_moves[:-1]): self.assertEqual( old_coord, transformed_board[coords.unflatten_coords(new_coord)])
def describe(self): sort_order = list(range(go.N * go.N + 1)) sort_order.sort(key=lambda i: (self.child_N[i], self.child_action_score[i]), reverse=True) soft_n = self.child_N / sum(self.child_N) p_delta = soft_n - self.child_prior p_rel = p_delta / self.child_prior # Dump out some statistics output = [] output.append("{q:.4f}\n".format(q=self.Q)) output.append(self.most_visited_path()) output.append( "move: action Q U P P-Dir N soft-N p-delta p-rel\n" ) output.append("\n".join([ "{!s:6}: {: .3f}, {: .3f}, {:.3f}, {:.3f}, {:.3f}, {:4d} {:.4f} {: .5f} {: .2f}" .format(coords.to_human_coord(coords.unflatten_coords(key)), self.child_action_score[key], self.child_Q[key], self.child_U[key], self.child_prior[key], self.original_prior[key], int(self.child_N[key]), soft_n[key], p_delta[key], p_rel[key]) for key in sort_order ][:15])) return ''.join(output)
def heatmap(self, sort_order, node, prop): return "\n".join([ "{!s:6} {}".format( coords.to_human_coord(coords.unflatten_coords(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0 ][:20])
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -0.9999 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) # Sets is_done to be True if player.should resign. if player.should_resign(): # TODO: make this less side-effecty. break move = player.pick_move() player.play_move(move) if player.is_done(): # TODO: actually handle the result instead of ferrying it around as a property. player.result = player.position.result() break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts / 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_human_coord(coords.unflatten_coords(player.root.fmove))) # TODO: break when i >= 2 * go.N * go.N (where is this being done now??...) return player
def maybe_add_child(self, fcoord): """ Adds child node for fcoord if it doesn't already exist, and returns it. """ if fcoord not in self.children: new_position = self.position.play_move( coords.unflatten_coords(fcoord)) self.children[fcoord] = MCTSNode(new_position, fmove=fcoord, parent=self) return self.children[fcoord]
def test_upperleft(self): self.assertEqual(coords.parse_sgf_coords('aa'), (0, 0)) self.assertEqual(coords.unflatten_coords(0), (0, 0)) self.assertEqual(coords.parse_kgs_coords('A9'), (0, 0)) self.assertEqual(coords.parse_pygtp_coords((1, 9)), (0, 0)) self.assertEqual(coords.unparse_sgf_coords((0, 0)), 'aa') self.assertEqual(coords.flatten_coords((0, 0)), 0) self.assertEqual(coords.to_human_coord((0, 0)), 'A9') self.assertEqual(coords.unparse_pygtp_coords((0, 0)), (1, 9))
def test_topleft(self): self.assertEqual(coords.parse_sgf_coords('ia'), (0, 8)) self.assertEqual(coords.unflatten_coords(8), (0, 8)) self.assertEqual(coords.parse_kgs_coords('J9'), (0, 8)) self.assertEqual(coords.parse_pygtp_coords((9, 9)), (0, 8)) self.assertEqual(coords.unparse_sgf_coords((0, 8)), 'ia') self.assertEqual(coords.flatten_coords((0, 8)), 8) self.assertEqual(coords.to_human_coord((0, 8)), 'J9') self.assertEqual(coords.unparse_pygtp_coords((0, 8)), (9, 9))
def test_pass(self): self.assertEqual(coords.parse_sgf_coords(''), None) self.assertEqual(coords.unflatten_coords(81), None) self.assertEqual(coords.parse_kgs_coords('pass'), None) self.assertEqual(coords.parse_pygtp_coords((0, 0)), None) self.assertEqual(coords.unparse_sgf_coords(None), '') self.assertEqual(coords.flatten_coords(None), 81) self.assertEqual(coords.to_human_coord(None), 'pass') self.assertEqual(coords.unparse_pygtp_coords(None), (0, 0))
def mvp_gg(self): """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17...""" node = self output = [] while node.children and max(node.child_N) > 1: next_kid = np.argmax(node.child_N) node = node.children[next_kid] output.append( "%s" % coords.to_human_coord(coords.unflatten_coords(node.fmove))) return ' '.join(output)
def test_legal_moves(self): board = test_utils.load_board(''' .O.O.XOX. O..OOOOOX ......O.O OO.....OX XO.....X. .O....... OX.....OO XX...OOOX .....O.X. ''') position = Position(board=board, to_play=BLACK) illegal_moves = parse_kgs_coords_set('A9 E9 J9') legal_moves = parse_kgs_coords_set('A4 G1 J1 H7') | {None} for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) # check that the bulk legal test agrees with move-by-move illegal test. bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=unflatten_coords(i)): self.assertEqual(bulk_legal, position.is_move_legal(unflatten_coords(i))) # flip the colors and check that everything is still (il)legal position = Position(board=-board, to_play=WHITE) for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=unflatten_coords(i)): self.assertEqual(bulk_legal, position.is_move_legal(unflatten_coords(i)))
def test_legal_moves(self): board = test_utils.load_board(''' .O.O.XOX. O..OOOOOX ......O.O OO.....OX XO.....X. .O....... OX.....OO XX...OOOX .....O.X. ''') position = Position(board=board, to_play=BLACK) illegal_moves = parse_kgs_coords_set('A9 E9 J9') legal_moves = parse_kgs_coords_set('A4 G1 J1 H7') | {None} for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) # check that the bulk legal test agrees with move-by-move illegal test. bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=unflatten_coords(i)): self.assertEqual( bulk_legal, position.is_move_legal(unflatten_coords(i))) # flip the colors and check that everything is still (il)legal position = Position(board=-board, to_play=WHITE) for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=unflatten_coords(i)): self.assertEqual( bulk_legal, position.is_move_legal(unflatten_coords(i)))
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append("GAME END") break output.append("%s (%d) ==> " % (coords.to_human_coord( coords.unflatten_coords(node.fmove)), node.N)) output.append("Q: {:.5f}\n".format(node.Q)) return ''.join(output)
def pick_move(self): '''Picks a move to play, based on MCTS readout statistics. Highest N is most robust indicator. In the early stage of the game, pick a move weighted by visit count; later on, pick the absolute max.''' if self.root.position.n > self.temp_threshold: fcoord = np.argmax(self.root.child_N) else: cdf = self.root.child_N.cumsum() cdf /= cdf[-1] selection = random.random() fcoord = cdf.searchsorted(selection) assert self.root.child_N[fcoord] != 0 return coords.unflatten_coords(fcoord)
def pick_move(self): '''Picks a move to play, based on MCTS readout statistics. Highest N is most robust indicator. In the early stage of the game, pick a move weighted by visit count; later on, pick the absolute max.''' if self.root.position.n > self.temp_threshold: fcoord = np.argmax(self.root.child_N) else: cdf = self.root.child_N.cumsum() cdf /= cdf[-1] selection = random.random() fcoord = cdf.searchsorted(selection) assert self.root.child_N[fcoord] != 0 return coords.unflatten_coords(fcoord)
def heatmap(self, sort_order, node, prop): return "\n".join(["{!s:6} {}".format( coords.to_human_coord(coords.unflatten_coords(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0][:20])
def eval_player(player, positions, moves, results): probs, values = batch_run_many(player, positions) policy_moves = [coords.unflatten_coords(c) for c in np.argmax(probs, axis=1)] top_move_agree = [moves[idx] == policy_moves[idx] for idx in range(len(moves))] square_err = (values - results)**2/4 return top_move_agree, square_err
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_human_coord(coords.unflatten_coords(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player