def predict_move(filename, network): # Strategies: def initialize_game(self, position=None): #filename = '/usr/local/google/home/vbittorf/projects/minigo/benchmark_sgf/prob_0001.sgf' replay = [] with open(filename) as f: text = f.read() print(text) for position_w_context in sgf_wrapper.replay_sgf(text): replay.append(position_w_context) print(replay) # model_path = '/usr/local/google/home/vbittorf/Documents/minigo/rl_pipeline/models/000003-leopard' #model_path = '/usr/local/google/home/vbittorf/Documents/minigo/20hour1000game/models/000002-nassau' #white_net = dual_net.DualNetwork(model_path) #black_net = dual_net.DualNetwork(model_path) #print(evaluation.play_match(white_net, black_net, 1, 50, "/tmp/sgf", 0)) black_net = network player = MCTSPlayer(black_net, verbosity=0, two_player_mode=True, num_parallel=4) readouts = 361 * 10 tried = 0 correct = 0 for position_w_context in replay: if position_w_context.next_move is None: continue player.initialize_game(position_w_context.position) current_readouts = player.root.N while player.root.N < current_readouts + readouts: player.tree_search() move = player.pick_move() # if player.should_resign(): # Force resign # move = 'R' # else: # move = player.suggest_move(position_w_context.position) tried += 1 if move == position_w_context.next_move: correct += 1 player.play_move(move) print(player.root.position) print(move, position_w_context.next_move) return move, position_w_context.next_move, move == position_w_context.next_move print('Correct: ', correct * 1.0 / tried)
def predict_move(filename, network, tries_per_move=1, readouts=1000): replay = [] if filename not in REPLAY_CACHE: with open(filename) as f: text = f.read() for position_w_context in sgf_wrapper.replay_sgf(text): replay.append(position_w_context) REPLAY_CACHE[filename] = replay replay = REPLAY_CACHE[filename] black_net = network player = MCTSPlayer( black_net, verbosity=0, two_player_mode=True, num_parallel=4) tried = 0 correct = 0 move_ratings = [] for position_w_context in replay: if position_w_context.next_move is None: continue num_correct = 0 for i in range(tries_per_move): move, correct_move, is_correct = predict_position(position_w_context, player, readouts=readouts) if is_correct: num_correct += 1 move_ratings.append(num_correct * 1.0 / tries_per_move) print('RATING: ', sum(move_ratings) / len(move_ratings)) return move_ratings
def load_player(model_path): print("Loading weights from %s ... " % model_path) with timer("Loading weights from %s ... " % model_path): network = dual_net.DualNetwork(model_path) network.name = os.path.basename(model_path) player = MCTSPlayer(network, verbosity=2) return player
def predict_move(filename, network): # Strategies: def initialize_game(self, position=None): #filename = '/usr/local/google/home/vbittorf/projects/minigo/benchmark_sgf/prob_0001.sgf' replay = [] with open(filename) as f: text = f.read() print(text) for position_w_context in sgf_wrapper.replay_sgf(text): replay.append(position_w_context) print(replay) # model_path = '/usr/local/google/home/vbittorf/Documents/minigo/rl_pipeline/models/000003-leopard' #model_path = '/usr/local/google/home/vbittorf/Documents/minigo/20hour1000game/models/000002-nassau' #white_net = dual_net.DualNetwork(model_path) #black_net = dual_net.DualNetwork(model_path) #print(evaluation.play_match(white_net, black_net, 1, 50, "/tmp/sgf", 0)) black_net = network player = MCTSPlayer( black_net, verbosity=0, two_player_mode=True, num_parallel=4) readouts = 361 * 10 tried = 0 correct = 0 for position_w_context in replay: if position_w_context.next_move is None: continue player.initialize_game(position_w_context.position) current_readouts = player.root.N while player.root.N < current_readouts + readouts: player.tree_search() move = player.pick_move() #if player.should_resign(): # Force resign # move = 'R' #else: # move = player.suggest_move(position_w_context.position) tried += 1 if move == position_w_context.next_move: correct += 1 player.play_move(move) print(player.root.position) print(move, position_w_context.next_move) return move, position_w_context.next_move, move == position_w_context.next_move print('Correct: ', correct * 1.0 / tried)
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -0.9999 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (player.root.position.n, readouts, dur / readouts / 100.0, dur), flush=True) if verbosity >= 3: print( "Played >>", coords.to_human_coord( coords.unflatten_coords(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def play_match(black_net, white_net, games, readouts, sgf_dir, verbosity): """Plays matches between two neural nets. black_net: Instance of minigo.DualNetwork, a wrapper around a tensorflow convolutional network. white_net: Instance of the minigo.DualNetwork. games: number of games to play. We play all the games at the same time. sgf_dir: directory to write the sgf results. readouts: number of readouts to perform for each step in each game. """ # For n games, we create lists of n black and n white players black = MCTSPlayer( black_net, verbosity=verbosity, two_player_mode=True, num_parallel=SIMULTANEOUS_LEAVES) white = MCTSPlayer( white_net, verbosity=verbosity, two_player_mode=True, num_parallel=SIMULTANEOUS_LEAVES) black_name = os.path.basename(black_net.save_file) white_name = os.path.basename(white_net.save_file) winners = [] for i in range(games): num_move = 0 # The move number of the current game black.initialize_game() white.initialize_game() while True: start = time.time() active = white if num_move % 2 else black inactive = black if num_move % 2 else white current_readouts = active.root.N while active.root.N < current_readouts + readouts: active.tree_search() # print some stats on the search if verbosity >= 3: print(active.root.position) # First, check the roots for hopeless games. if active.should_resign(): # Force resign active.set_result(-1 * active.root.position.to_play, was_resign=True) inactive.set_result( active.root.position.to_play, was_resign=True) if active.is_done(): fname = "{:d}-{:s}-vs-{:s}-{:d}.sgf".format(int(time.time()), white_name, black_name, i) if active.result_string is None: # This is an exceptionally rare corner case where we don't get a winner. # Our temporary solution is to just drop this game. break winners.append(active.result_string[0]) with open(os.path.join(sgf_dir, fname), 'w') as _file: sgfstr = sgf_wrapper.make_sgf(active.position.recent, active.result_string, black_name=black_name, white_name=white_name) _file.write(sgfstr) print("Finished game", i, active.result_string) break move = active.pick_move() # print('DBUG Picked move: ', move, active, num_move) active.play_move(move) inactive.play_move(move) dur = time.time() - start num_move += 1 if (verbosity > 1) or (verbosity == 1 and num_move % 10 == 9): timeper = (dur / readouts) * 100.0 print(active.root.position) print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (num_move, readouts, timeper, dur)) return winners
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_human_coord(coords.unflatten_coords(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def play_match(black_net, white_net, games, sgf_dir, verbosity): """Plays matches between two neural nets. black_net: Instance of minigo.DualNetwork, a wrapper around a tensorflow convolutional network. white_net: Instance of the minigo.DualNetwork. games: number of games to play. We play all the games at the same time. sgf_dir: directory to write the sgf results. """ readouts = flags.FLAGS.num_readouts # Flag defined in strategies.py black = MCTSPlayer( black_net, verbosity=verbosity, two_player_mode=True) white = MCTSPlayer( white_net, verbosity=verbosity, two_player_mode=True) black_name = os.path.basename(black_net.save_file) white_name = os.path.basename(white_net.save_file) for i in range(games): num_move = 0 # The move number of the current game black.initialize_game() white.initialize_game() while True: start = time.time() active = white if num_move % 2 else black inactive = black if num_move % 2 else white current_readouts = active.root.N while active.root.N < current_readouts + readouts: active.tree_search() # print some stats on the search if verbosity >= 3: print(active.root.position) # First, check the roots for hopeless games. if active.should_resign(): # Force resign active.set_result(-1 * active.root.position.to_play, was_resign=True) inactive.set_result( active.root.position.to_play, was_resign=True) if active.is_done(): fname = "{:d}-{:s}-vs-{:s}-{:d}.sgf".format(int(time.time()), white_name, black_name, i) with gfile.GFile(os.path.join(sgf_dir, fname), 'w') as _file: sgfstr = sgf_wrapper.make_sgf(active.position.recent, active.result_string, black_name=black_name, white_name=white_name) _file.write(sgfstr) print("Finished game", i, active.result_string) break move = active.pick_move() active.play_move(move) inactive.play_move(move) dur = time.time() - start num_move += 1 if (verbosity > 1) or (verbosity == 1 and num_move % 10 == 9): timeper = (dur / readouts) * 100.0 print(active.root.position) print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (num_move, readouts, timeper, dur))
def play(board_size, network, readouts, resign_threshold, simultaneous_leaves, verbosity=0): """Plays out a self-play match. Args: board_size: the go board size network: the DualNet model readouts: the number of readouts in MCTS resign_threshold: the threshold to resign at in the match simultaneous_leaves: the number of simultaneous leaves in MCTS verbosity: the verbosity of the self-play match Returns: the final position the n x 362 tensor of floats representing the mcts search probabilities the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game. """ player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=simultaneous_leaves) # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if verbosity >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or ( verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur)) if verbosity >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def play_match(black_net, white_net, games, readouts, sgf_dir, verbosity): """Plays matches between two neural nets. black_net: Instance of minigo.DualNetwork, a wrapper around a tensorflow convolutional network. white_net: Instance of the minigo.DualNetwork. games: number of games to play. We play all the games at the same time. sgf_dir: directory to write the sgf results. readouts: number of readouts to perform for each step in each game. """ # For n games, we create lists of n black and n white players black = MCTSPlayer(black_net, verbosity=verbosity, two_player_mode=True, num_parallel=SIMULTANEOUS_LEAVES) white = MCTSPlayer(white_net, verbosity=verbosity, two_player_mode=True, num_parallel=SIMULTANEOUS_LEAVES) black_name = os.path.basename(black_net.save_file) white_name = os.path.basename(white_net.save_file) winners = [] for i in range(games): num_move = 0 # The move number of the current game black.initialize_game() white.initialize_game() while True: start = time.time() active = white if num_move % 2 else black inactive = black if num_move % 2 else white current_readouts = active.root.N while active.root.N < current_readouts + readouts: active.tree_search() # print some stats on the search if verbosity >= 3: print(active.root.position) # First, check the roots for hopeless games. if active.should_resign(): # Force resign active.set_result(-1 * active.root.position.to_play, was_resign=True) inactive.set_result(active.root.position.to_play, was_resign=True) if active.is_done(): fname = "{:d}-{:s}-vs-{:s}-{:d}.sgf".format( int(time.time()), white_name, black_name, i) if active.result_string is None: # This is an exceptionally rare corner case where we don't get a winner. # Our temporary solution is to just drop this game. break winners.append(active.result_string[0]) with open(os.path.join(sgf_dir, fname), 'w') as _file: sgfstr = sgf_wrapper.make_sgf(active.position.recent, active.result_string, black_name=black_name, white_name=white_name) _file.write(sgfstr) print("Finished game", i, active.result_string) break move = active.pick_move() # print('DBUG Picked move: ', move, active, num_move) active.play_move(move) inactive.play_move(move) dur = time.time() - start num_move += 1 if (verbosity > 1) or (verbosity == 1 and num_move % 10 == 9): timeper = (dur / readouts) * 100.0 print(active.root.position) print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (num_move, readouts, timeper, dur)) return winners
def play(board_size, network, readouts, resign_threshold, simultaneous_leaves, verbosity=0): """Plays out a self-play match. Args: board_size: the go board size network: the DualNet model readouts: the number of readouts in MCTS resign_threshold: the threshold to resign at in the match simultaneous_leaves: the number of simultaneous leaves in MCTS verbosity: the verbosity of the self-play match Returns: the final position the n x 362 tensor of floats representing the mcts search probabilities the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game. """ player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=simultaneous_leaves) # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if verbosity >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (player.root.position.n, readouts, dur / readouts * 100.0, dur)) if verbosity >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def play_match(black_net, white_net, games, readouts, verbosity): black_players = [ MCTSPlayer(black_net, verbosity=verbosity, two_player_mode=True) for i in range(games) ] white_players = [ MCTSPlayer(white_net, verbosity=verbosity, two_player_mode=True) for i in range(games) ] player_pairs = [(b, w) for b, w in zip(black_players, white_players)] done_pairs = [] global_n = 0 for p1, p2 in player_pairs: p1.initialize_game() p2.initialize_game() while player_pairs: start = time.time() for i in range(readouts): leaves = [ pair[global_n % 2].root.select_leaf() for pair in player_pairs ] probs, vals = (black_net, white_net)[global_n % 2].run_many( [leaf.position for leaf in leaves]) [ leaf.incorporate_results(prob, val, up_to=pair[global_n % 2].root) for pair, leaf, prob, val in zip(player_pairs, leaves, probs, vals) ] # print some stats on the search if (verbosity >= 3): print(player_pairs[0][0].root.position) for black, white in player_pairs: active = white if global_n % 2 else black inactive = black if global_n % 2 else white # First, check the roots for hopeless games. if active.should_resign(): # Force resign continue move = active.pick_move() active.play_move(move) inactive.play_move(move) dur = time.time() - start global_n += 1 if (verbosity > 1) or (verbosity == 1 and global_n % 10 == 9): print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (global_n, readouts * len(player_pairs), dur / (readouts * len(player_pairs) / 100.0), dur)) done_pairs.extend( [p for p in player_pairs if p[0].is_done() or p[1].is_done()]) player_pairs = [ p for p in player_pairs if not (p[0].is_done() or p[1].is_done()) ] return done_pairs
def play_match(params, black_net, white_net, games, readouts, sgf_dir, verbosity): """Plays matches between two neural nets. One net that wins by a margin of 55% will be the winner. Args: params: An object of hyperparameters. black_net: Instance of the DualNetRunner class to play as black. white_net: Instance of the DualNetRunner class to play as white. games: Number of games to play. We play all the games at the same time. readouts: Number of readouts to perform for each step in each game. sgf_dir: Directory to write the sgf results. verbosity: Verbosity to show evaluation process. Returns: 'B' is the winner is black_net, otherwise 'W'. """ # For n games, we create lists of n black and n white players black = MCTSPlayer(params.board_size, black_net, verbosity=verbosity, two_player_mode=True, num_parallel=params.simultaneous_leaves) white = MCTSPlayer(params.board_size, white_net, verbosity=verbosity, two_player_mode=True, num_parallel=params.simultaneous_leaves) black_name = os.path.basename(black_net.save_file) white_name = os.path.basename(white_net.save_file) black_win_counts = 0 white_win_counts = 0 for i in range(games): num_move = 0 # The move number of the current game black.initialize_game() white.initialize_game() while True: start = time.time() active = white if num_move % 2 else black inactive = black if num_move % 2 else white current_readouts = active.root.N while active.root.N < current_readouts + readouts: active.tree_search() # print some stats on the search if verbosity >= 3: print(active.root.position) # First, check the roots for hopeless games. if active.should_resign(): # Force resign active.set_result(-active.root.position.to_play, was_resign=True) inactive.set_result(active.root.position.to_play, was_resign=True) if active.is_done(): fname = '{:d}-{:s}-vs-{:s}-{:d}.sgf'.format( int(time.time()), white_name, black_name, i) with open(os.path.join(sgf_dir, fname), 'w') as f: sgfstr = sgf_wrapper.make_sgf(params.board_size, active.position.recent, active.result_string, black_name=black_name, white_name=white_name) f.write(sgfstr) print('Finished game', i, active.result_string) if active.result_string is not None: if active.result_string[0] == 'B': black_win_counts += 1 elif active.result_string[0] == 'W': white_win_counts += 1 break move = active.pick_move() active.play_move(move) inactive.play_move(move) dur = time.time() - start num_move += 1 if (verbosity > 1) or (verbosity == 1 and num_move % 10 == 9): timeper = (dur / readouts) * 100.0 print(active.root.position) print('{:d}: {:d} readouts, {:.3f} s/100. ({:.2f} sec)'.format( num_move, readouts, timeper, dur)) if (black_win_counts - white_win_counts) > params.eval_win_rate * games: return go.BLACK_NAME else: return go.WHITE_NAME
def play_match(black_net, white_net, games, readouts, verbosity): """Plays matches between two neural nets. black_net: Instance of minigo.DualNetwork, a wrapper around a tensorflow convolutional network. white_net: Instance of the minigo.DualNetwork. games: number of games to play. We play all the games at the same time. readouts: number of readouts to perform for each step in each game. """ # For n games, we create lists of n black and n white players black_players = [ MCTSPlayer(black_net, verbosity=verbosity, two_player_mode=True) for i in range(games) ] white_players = [ MCTSPlayer(white_net, verbosity=verbosity, two_player_mode=True) for i in range(games) ] # Each player pair represents two players that are going to play a game. player_pairs = [(b, w) for b, w in zip(black_players, white_players)] done_pairs = [] # The number of moves that have been played num_moves = 0 for black, white in player_pairs: black.initialize_game() white.initialize_game() # The heart of the game-playing loop. Each iteration through the while loop # plays one move for each player. That means we: # - Do a bunch of MTCS readouts (for each active player, for each game) # - Play a move (for each active player, for each game) # - Remove any finished player-pairs while player_pairs: start = time.time() for _ in range(readouts): leaves = [ pair[num_moves % 2].root.select_leaf() for pair in player_pairs ] probs, vals = (black_net, white_net)[num_moves % 2].run_many( [leaf.position for leaf in leaves]) for pair, leaf, prob, val in zip(player_pairs, leaves, probs, vals): leaf.incorporate_results(prob, val, up_to=pair[num_moves % 2].root) # print some stats on the search if verbosity >= 3: print(player_pairs[0][0].root.position) for black, white in player_pairs: active = white if num_moves % 2 else black inactive = black if num_moves % 2 else white # First, check the roots for hopeless games. if active.should_resign(): # Force resign continue move = active.pick_move() active.play_move(move) inactive.play_move(move) dur = time.time() - start num_moves += 1 if (verbosity > 1) or (verbosity == 1 and num_moves % 10 == 9): rdcnt = readouts * len(player_pairs) timeper = dur / (readouts * len(player_pairs) / 100.0) print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (num_moves, rdcnt, timeper, dur)) done_pairs.extend( [p for p in player_pairs if p[0].is_done() or p[1].is_done()]) player_pairs = [ p for p in player_pairs if not (p[0].is_done() or p[1].is_done()) ] return done_pairs