def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -0.9999 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) # Sets is_done to be True if player.should resign. if player.should_resign(): # TODO: make this less side-effecty. break move = player.pick_move() player.play_move(move) if player.is_done(): # TODO: actually handle the result instead of ferrying it around as a property. player.result = player.position.result() break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts / 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_human_coord(coords.unflatten_coords(player.root.fmove))) # TODO: break when i >= 2 * go.N * go.N (where is this being done now??...) return player
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_human_coord(coords.unflatten_coords(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def play(board_size, network, readouts, resign_threshold, simultaneous_leaves, verbosity=0): """Plays out a self-play match. Args: board_size: the go board size network: the DualNet model readouts: the number of readouts in MCTS resign_threshold: the threshold to resign at in the match simultaneous_leaves: the number of simultaneous leaves in MCTS verbosity: the verbosity of the self-play match Returns: the final position the n x 362 tensor of floats representing the mcts search probabilities the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game. """ player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=simultaneous_leaves) # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if verbosity >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or ( verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur)) if verbosity >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def play(board_size, network, readouts, resign_threshold, simultaneous_leaves, verbosity=0): """Plays out a self-play match. Args: board_size: the go board size network: the DualNet model readouts: the number of readouts in MCTS resign_threshold: the threshold to resign at in the match simultaneous_leaves: the number of simultaneous leaves in MCTS verbosity: the verbosity of the self-play match Returns: the final position the n x 362 tensor of floats representing the mcts search probabilities the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game. """ player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=simultaneous_leaves) # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if verbosity >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (player.root.position.n, readouts, dur / readouts * 100.0, dur)) if verbosity >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player