def test_flatten(self): self.assertEqual(coords.to_flat((0, 0)), 0) self.assertEqual(coords.to_flat((0, 3)), 3) self.assertEqual(coords.to_flat((3, 0)), 27) self.assertEqual(coords.from_flat(27), (3, 0)) self.assertEqual(coords.from_flat(10), (1, 1)) self.assertEqual(coords.from_flat(80), (8, 8)) self.assertEqual(coords.to_flat( coords.from_flat(10)), 10) self.assertEqual(coords.from_flat( coords.to_flat((5, 4))), (5, 4))
def test_flatten(self): self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 0)), 0) self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 3)), 3) self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (3, 0)), 27) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 27), (3, 0)) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 10), (1, 1)) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 80), (8, 8)) self.assertEqual(coords.to_flat( utils_test.BOARD_SIZE, coords.from_flat(utils_test.BOARD_SIZE, 10)), 10) self.assertEqual(coords.from_flat( utils_test.BOARD_SIZE, coords.to_flat( utils_test.BOARD_SIZE, (5, 4))), (5, 4))
def test_proper_move_transform(self): # Check that the reinterpretation of 362 = 19*19 + 1 during symmetry # application is consistent with coords.from_flat move_array = np.arange(go.N ** 2 + 1) coord_array = np.zeros([go.N, go.N]) for c in range(go.N ** 2): coord_array[coords.from_flat(c)] = c for s in symmetries.SYMMETRIES: with self.subTest(symmetry=s): transformed_moves = apply_p(s, move_array) transformed_board = apply_f(s, coord_array) for new_coord, old_coord in enumerate(transformed_moves[:-1]): self.assertEqual( old_coord, transformed_board[coords.from_flat(new_coord)])
def describe(self): sort_order = list(range(go.N * go.N + 1)) sort_order.sort(key=lambda i: ( self.child_N[i], self.child_action_score[i]), reverse=True) soft_n = self.child_N / sum(self.child_N) p_delta = soft_n - self.child_prior p_rel = p_delta / self.child_prior # Dump out some statistics output = [] output.append("{q:.4f}\n".format(q=self.Q)) output.append(self.most_visited_path()) output.append( "move: action Q U P P-Dir N soft-N p-delta p-rel\n") output.append("\n".join(["{!s:6}: {: .3f}, {: .3f}, {:.3f}, {:.3f}, {:.3f}, {:4d} {:.4f} {: .5f} {: .2f}".format( coords.to_kgs(coords.from_flat(key)), self.child_action_score[key], self.child_Q[key], self.child_U[key], self.child_prior[key], self.original_prior[key], int(self.child_N[key]), soft_n[key], p_delta[key], p_rel[key]) for key in sort_order][:15])) return ''.join(output)
def maybe_add_child(self, fcoord): """ Adds child node for fcoord if it doesn't already exist, and returns it. """ if fcoord not in self.children: new_position = self.position.play_move( coords.from_flat(fcoord)) self.children[fcoord] = MCTSNode( new_position, fmove=fcoord, parent=self) return self.children[fcoord]
def test_pass(self): self.assertEqual(coords.from_sgf(''), None) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 81), None) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'pass'), None) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (0, 0)), None) self.assertEqual(coords.to_sgf(None), '') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, None), 81) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, None), 'pass') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, None), (0, 0))
def test_topleft(self): self.assertEqual(coords.from_sgf('ia'), (0, 8)) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 8), (0, 8)) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'J9'), (0, 8)) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (9, 9)), (0, 8)) self.assertEqual(coords.to_sgf((0, 8)), 'ia') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 8)), 8) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, (0, 8)), 'J9') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, (0, 8)), (9, 9))
def test_upperleft(self): self.assertEqual(coords.from_sgf('aa'), (0, 0)) self.assertEqual(coords.from_flat(utils_test.BOARD_SIZE, 0), (0, 0)) self.assertEqual(coords.from_kgs(utils_test.BOARD_SIZE, 'A9'), (0, 0)) self.assertEqual(coords.from_pygtp(utils_test.BOARD_SIZE, (1, 9)), (0, 0)) self.assertEqual(coords.to_sgf((0, 0)), 'aa') self.assertEqual(coords.to_flat(utils_test.BOARD_SIZE, (0, 0)), 0) self.assertEqual(coords.to_kgs(utils_test.BOARD_SIZE, (0, 0)), 'A9') self.assertEqual(coords.to_pygtp(utils_test.BOARD_SIZE, (0, 0)), (1, 9))
def mvp_gg(self): """ Returns most visited path in go-gui VAR format e.g. 'b r3 w c17...""" node = self output = [] while node.children and max(node.child_N) > 1: next_kid = np.argmax(node.child_N) node = node.children[next_kid] output.append("%s" % coords.to_kgs( coords.from_flat(node.fmove))) return ' '.join(output)
def test_pass(self): self.assertEqual(coords.from_sgf(''), None) self.assertEqual(coords.from_flat(81), None) self.assertEqual(coords.from_kgs('pass'), None) self.assertEqual(coords.from_pygtp((0, 0)), None) self.assertEqual(coords.to_sgf(None), '') self.assertEqual(coords.to_flat(None), 81) self.assertEqual(coords.to_kgs(None), 'pass') self.assertEqual(coords.to_pygtp(None), (0, 0))
def test_topleft(self): self.assertEqual(coords.from_sgf('ia'), (0, 8)) self.assertEqual(coords.from_flat(8), (0, 8)) self.assertEqual(coords.from_kgs('J9'), (0, 8)) self.assertEqual(coords.from_pygtp((9, 9)), (0, 8)) self.assertEqual(coords.to_sgf((0, 8)), 'ia') self.assertEqual(coords.to_flat((0, 8)), 8) self.assertEqual(coords.to_kgs((0, 8)), 'J9') self.assertEqual(coords.to_pygtp((0, 8)), (9, 9))
def test_upperleft(self): self.assertEqual(coords.from_sgf('aa'), (0, 0)) self.assertEqual(coords.from_flat(0), (0, 0)) self.assertEqual(coords.from_kgs('A9'), (0, 0)) self.assertEqual(coords.from_pygtp((1, 9)), (0, 0)) self.assertEqual(coords.to_sgf((0, 0)), 'aa') self.assertEqual(coords.to_flat((0, 0)), 0) self.assertEqual(coords.to_kgs((0, 0)), 'A9') self.assertEqual(coords.to_pygtp((0, 0)), (1, 9))
def test_legal_moves(self): board = test_utils.load_board(''' .O.O.XOX. O..OOOOOX ......O.O OO.....OX XO.....X. .O....... OX.....OO XX...OOOX .....O.X. ''') position = Position(board=board, to_play=BLACK) illegal_moves = coords_from_kgs_set('A9 E9 J9') legal_moves = coords_from_kgs_set('A4 G1 J1 H7') | {None} for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) # check that the bulk legal test agrees with move-by-move illegal test. bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=coords.from_flat(i)): self.assertEqual( bulk_legal, position.is_move_legal(coords.from_flat(i))) # flip the colors and check that everything is still (il)legal position = Position(board=-board, to_play=WHITE) for move in illegal_moves: with self.subTest(type='illegal', move=move): self.assertFalse(position.is_move_legal(move)) for move in legal_moves: with self.subTest(type='legal', move=move): self.assertTrue(position.is_move_legal(move)) bulk_legality = position.all_legal_moves() for i, bulk_legal in enumerate(bulk_legality): with self.subTest(type='bulk', move=coords.from_flat(i)): self.assertEqual( bulk_legal, position.is_move_legal(coords.from_flat(i)))
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append("GAME END") break output.append("%s (%d) ==> " % (coords.to_kgs( coords.from_flat(node.fmove)), node.N)) output.append("Q: {:.5f}\n".format(node.Q)) return ''.join(output)
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append("GAME END") break output.append( "%s (%d) ==> " % (coords.to_kgs(coords.from_flat(node.fmove)), node.N)) output.append("Q: {:.5f}\n".format(node.Q)) return ''.join(output)
def pick_move(self): '''Picks a move to play, based on MCTS readout statistics. Highest N is most robust indicator. In the early stage of the game, pick a move weighted by visit count; later on, pick the absolute max.''' if self.root.position.n >= self.temp_threshold: fcoord = np.argmax(self.root.child_N) else: cdf = self.root.child_N.cumsum() cdf /= cdf[-2] # Prevents passing via softpick. selection = random.random() fcoord = cdf.searchsorted(selection) assert self.root.child_N[fcoord] != 0 return coords.from_flat(fcoord)
def apply(action, history): """ Apply the action as the next move of given history. action: legal move, given as flat coordinates. history: history of the game so far. """ board = history[-1].copy() to_play = -1 if len(history) % 2 == 0 else 1 # if not pass if action is not board.size: p = go.Position(board=board, to_play=to_play) p.play_move(coords.from_flat(action), mutate=True) if p.ko is not None: board[p.ko] = 4 history.append(board)
def pick_move(self): '''Picks a move to play, based on MCTS readout statistics. Highest N is most robust indicator. In the early stage of the game, pick a move weighted by visit count; later on, pick the absolute max.''' if self.root.position.n > self.temp_threshold: fcoord = np.argmax(self.root.child_N) else: cdf = self.root.child_N.cumsum() cdf /= cdf[-1] selection = random.random() fcoord = cdf.searchsorted(selection) assert self.root.child_N[fcoord] != 0 return coords.from_flat(fcoord)
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append('GAME END') break output.append('{} ({}) ==> '.format( coords.to_kgs(self.board_size, coords.from_flat(self.board_size, node.fmove)), node.N)) output.append('Q: {:.5f}\n'.format(node.Q)) return ''.join(output)
def most_visited_path(self): node = self output = [] while node.children: next_kid = np.argmax(node.child_N) node = node.children.get(next_kid) if node is None: output.append('GAME END') break output.append('{} ({}) ==> '.format( coords.to_kgs( self.board_size, coords.from_flat(self.board_size, node.fmove)), node.N)) output.append('Q: {:.5f}\n'.format(node.Q)) return ''.join(output)
def extract_move_data(root_node, worker_id, completed_time, board_size): current_node = root_node.next move_data = [] move_num = 1 while current_node is not None: props = current_node.properties if 'B' in props: to_play = 1 move_played = props['B'][0] elif 'W' in props: to_play = -1 move_played = props['W'][0] else: import pdb; pdb.set_trace() move_played = coords.to_flat(coords.from_sgf(move_played)) post_Q, debug_rows = parse_comment_node(props['C'][0]) policy_prior = [0] * (board_size * board_size + 1) policy_prior_orig = policy_prior[:] mcts_visit_counts = policy_prior[:] mcts_visit_counts_norm = policy_prior[:] for debug_row in debug_rows: move = debug_row.move policy_prior[move] = debug_row.prior policy_prior_orig[move] = debug_row.orig_prior mcts_visit_counts[move] = debug_row.N mcts_visit_counts_norm[move] = debug_row.soft_N move_data.append({ 'worker_id': worker_id, 'completed_time': completed_time, 'move_num': move_num, 'turn_to_play': to_play, 'move': move_played, 'move_kgs': coords.to_kgs(coords.from_flat(move_played)), 'prior_Q': None, 'post_Q': post_Q, 'policy_prior': policy_prior, 'policy_prior_orig': policy_prior_orig, 'mcts_visit_counts': mcts_visit_counts, 'mcts_visit_counts_norm': mcts_visit_counts_norm, }) move_num += 1 current_node = current_node.next return move_data
def run(self, model, position): # assert position is of type Position from go.py root = Node(0, position.to_play) boards, playerCaps, opponentCaps = gamesToData([[position, 1]]) action_probs = model.callPol(boards, playerCaps, opponentCaps)[0] value = model.callVal(boards, playerCaps, opponentCaps)[0] valid_moves = position.all_legal_moves() action_probs = action_probs * valid_moves # mask invalid moves action_probs /= np.sum(action_probs) root.expand(position, action_probs) for _ in range(self.number_of_sim): node = root search_path = [node] while node.expanded(): action, node = node.select_child() search_path.append(node) parent = search_path[-2] position = parent.position next_position = position.play_move(coords.from_flat(action)) if not next_position.is_game_over(): new_boards, new_playerCaps, new_opponentCaps = gamesToData( [[next_position, 1]]) action_probs = model.callPol( new_boards, new_playerCaps, new_opponentCaps)[0] value = model.callVal( new_boards, new_playerCaps, new_opponentCaps)[0] valid_moves = next_position.all_legal_moves() action_probs = action_probs * valid_moves # mask invalid moves action_probs /= np.sum(action_probs) node.expand(next_position, action_probs) else: if next_position.to_play == 1: value = next_position.result() else: value = next_position.result()*-1 self.backpropagate(search_path, value, next_position.to_play) return root
def _from_flat(flat_coords): return coords.from_flat(utils_test.BOARD_SIZE, flat_coords)
def heatmap(self, sort_order, node, prop): return "\n".join(["{!s:6} {}".format( coords.to_kgs(coords.from_flat(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0][:20])
def extract_move_data(root_node, worker_id, completed_time, board_size): current_node = root_node.next move_data = [] move_num = 1 while current_node is not None: props = current_node.properties if 'B' in props: to_play = 1 move_played = props['B'][0] elif 'W' in props: to_play = -1 move_played = props['W'][0] else: import pdb pdb.set_trace() move_played = coords.to_flat(coords.from_sgf(move_played)) post_Q, debug_rows = parse_comment_node(props['C'][0]) def get_row_data(debug_row): column_names = ["prior", "orig_prior", "N", "soft_N"] return [getattr(debug_row, field) for field in column_names] if FLAGS.only_top_move: assert len(debug_rows) <= 1 row_data = list(map(get_row_data, debug_rows)) else: row_data = [[0] * 4 for _ in range(board_size * board_size + 1)] for debug_row in debug_rows: move = debug_row.move row_data[move] = get_row_data(debug_row) policy_prior, policy_prior_orig, mcts_visits, mcts_visits_norm = \ zip(*row_data) move_data.append({ 'worker_id': worker_id, 'completed_time': completed_time, 'move_num': move_num, 'turn_to_play': to_play, 'move': move_played, 'move_kgs': coords.to_gtp(coords.from_flat(move_played)), 'prior_Q': None, 'post_Q': post_Q, 'policy_prior': policy_prior, 'policy_prior_orig': policy_prior_orig, 'mcts_visit_counts': mcts_visits, 'mcts_visit_counts_norm': mcts_visits_norm, }) move_num += 1 current_node = current_node.next return move_data
def _heatmap(self, sort_order, node, prop): return "\n".join([ "{!s:6} {}".format(coords.to_kgs(coords.from_flat(key)), node.__dict__.get(prop)[key]) for key in sort_order if node.child_N[key] > 0 ][:20])
def play_mcts(network, board=None): pos = Position(board=board) player = get_mcts_player(network, pos) node = player.root children = node.rank_children() soft_n = node.child_N / max(1, sum(node.child_N)) original_moves = {} heatmap = np.zeros((N, N), dtype=np.float) a_b = None for i in children: if node.child_N[i] == 0: break if a_b is None: a_b = coords.from_flat(i) original_moves[coords.to_gtp(coords.from_flat(i))] = soft_n[i] a_b = player.pick_move() # player.play_move(move) a_b_coords = a_b a_b = coords.to_gtp(a_b) print(original_moves) print("best action: ", a_b) print(node.position) p = original_moves[a_b] print(p) for i in range(N): for j in range(N): if board[i][j] == -1 or board[i][j] == 1: new_board = np.copy(board) new_board[i, j] = 0 new_pos = perturb_position(pos, new_board) if new_pos.is_move_legal(a_b_coords): player = get_mcts_player(network, new_pos) node = player.root print(node.position) new_moves = {} children = node.rank_children() soft_n = node.child_N / max(1, sum(node.child_N)) for ch in children: if node.child_N[ch] == 0: break new_moves[coords.to_gtp(coords.from_flat(ch))] = soft_n[ch] new_a_b = player.pick_move() # player.play_move(move) new_a_b = coords.to_gtp(new_a_b) # if new_a_b == 'F5': print("---------------------") # print("Moves: ", new_moves) if a_b in new_moves: new_p = new_moves[a_b] else: new_p = 0. print("New best move", new_a_b) print("p", new_p) print("------------------") K = cross_entropy_mcts(original_moves, new_moves, a_b) if K == -1: print("index", i, j) heatmap[i, j] = -1.0 continue dP = p - new_p if dP > 0: heatmap[i, j] = 2.0*dP/(1. + dP*K) else: heatmap[i, j] = -1.0 heatmap[heatmap == -1] = np.max(heatmap) heatmap[heatmap<np.max(heatmap)/1.5] = 0 plt.imshow(heatmap, cmap='jet') plt.colorbar() plt.show() return player
def play_network(network, board=None): ''' Generates saliency maps of 3 methods given a board position ''' pos = Position(board=board) original_moves = {} heatmap = np.zeros((N,N), dtype=np.float) policy, V = network.run(pos) best_move = np.argmax(policy) print("Best Move is", coords.to_gtp(coords.from_flat(best_move))) p = np.max(policy) player = get_mcts_player(network, pos) node = player.root old_Q = node.child_Q[best_move] atariV = np.zeros([N, N], dtype=np.float) atariP = np.zeros([N, N], dtype=np.float) delQ = np.zeros([N, N], dtype=np.float) heatmap = np.zeros([N, N], dtype=np.float) for i in range(N): for j in range(N): if board[i, j] == 1 or board[i, j] == -1: print(i, j) print("---------------------") new_board = np.copy(board) new_board[i, j] = 0 new_pos = perturb_position(pos, new_board) new_policy, new_V = network.run(new_pos) new_p = new_policy[best_move] player = get_mcts_player(network, pos) node = player.root # print(node.describe()) new_Q = node.child_Q[best_move] atariV[i, j] = 0.5*((V - new_V)**2) atariP[i, j] = 0.5*np.linalg.norm(policy - new_policy) dP = p - new_p dQ = old_Q - new_Q K = cross_entropy(policy, new_policy, best_move) if dP>0: heatmap[i, j] = 2*dP/(1 + dP*K) if dQ>0: delQ[i, j] = dQ atariV = (atariV - np.min(atariV))/(np.max(atariV) - np.min(atariV)) atariP = (atariP - np.min(atariP))/(np.max(atariP) - np.min(atariP)) # heatmap[heatmap < np.max(heatmap)/3] = 0 # atariV[atariV < np.max(atariV)/3] = 0 # atariP[atariP < np.max(atariP)/3] = 0 # delQ[delQ < np.max(delQ)/3] = 0 frame = np.zeros((N,N,3)) frame = saliency_combine(atariV, frame, blur=256, channel=2) frame = saliency_combine(atariP, frame, blur=256, channel=0) plt.figure(1) plt.imshow(atariV, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'atariV.png') plt.show() plt.figure(2) plt.imshow(atariP, cmap= 'Reds') plt.colorbar() plt.savefig(save_path + 'atariP.png') plt.show() plt.figure(3) plt.imshow(frame) plt.savefig(save_path + 'atari.png') plt.show() plt.figure(4) plt.imshow(delQ, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'deltaQ.png') plt.show() plt.figure(5) plt.imshow(heatmap, cmap = 'Reds') plt.colorbar() plt.savefig(save_path + 'entropy.png') plt.show()
def eval_player(player, positions, moves, results): probs, values = batch_run_many(player, positions) policy_moves = [coords.from_flat(c) for c in np.argmax(probs, axis=1)] top_move_agree = [moves[idx] == policy_moves[idx] for idx in range(len(moves))] square_err = (values - results)**2/4 return top_move_agree, square_err
def play(network, readouts, resign_threshold, verbosity=0): ''' Plays out a self-play match, returning - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' player = MCTSPlayer(network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=SIMULTANEOUS_LEAVES) global_n = 0 # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) # print("prob", prob) # print("val", val) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if (verbosity >= 3): print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or (verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if verbosity >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player
def play(network): ''' Plays out a self-play match, returning a MCTSPlayer object containing: - the final position - the n x 362 tensor of floats representing the mcts search probabilities - the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game''' readouts = FLAGS.num_readouts # defined in strategies.py # Disable resign in 5% of games if random.random() < FLAGS.resign_disable_pct: resign_threshold = -1.0 else: resign_threshold = None player = MCTSPlayer(network, resign_threshold=resign_threshold) player.initialize_game() # Must run this once at the start to expand the root node. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if FLAGS.verbose >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (FLAGS.verbose >= 2) or (FLAGS.verbose >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % (player.root.position.n, readouts, dur / readouts * 100.0, dur), flush=True) if FLAGS.verbose >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if FLAGS.verbose >= 2: utils.dbg("%s: %.3f" % (player.result_string, player.root.Q)) utils.dbg(player.root.position, player.root.position.score()) return player
def testAgainstRandom(model, matches): #untrained models do not play at random, they have random weight initializations and then alawys play in terms of those #this function takes a model (a trained one) and plays it against a player who makes a random move every time. #it plays matches number of matches veteran = model veteranWins = 0 beginnerWins = 0 white = None black = None for i in range(matches): if i % 2 == 0: black = veteran else: white = veteran position = go.Position() while not position.is_game_over(): if position.n >= 100: position = position.pass_move() else: if position.to_play == 1: if black == veteran: boards, playerCaps, opponentCaps = gamesToData( [[position, 1]]) actions = black.callPol(boards, playerCaps, opponentCaps)[0] pdist = tf.nn.softmax( tf.cast(actions, dtype=tf.float64)) legalMoves = position.all_legal_moves() move = np.random.choice(np.arange(0, len(pdist)), p=pdist) if legalMoves[move] == 0: actions = actions * legalMoves move = tf.math.argmax(actions).numpy() position = position.play_move(coords.from_flat(move)) else: position = choose_and_play_move(position) else: if white == veteran: boards, playerCaps, opponentCaps = gamesToData( [[position, 1]]) actions = white.callPol(boards, playerCaps, opponentCaps)[0] pdist = tf.nn.softmax( tf.cast(actions, dtype=tf.float64)) legalMoves = position.all_legal_moves() move = np.random.choice(np.arange(0, len(pdist)), p=pdist) if legalMoves[move] == 0: actions = actions * legalMoves move = tf.math.argmax(actions).numpy() position = position.play_move(coords.from_flat(move)) else: position = choose_and_play_move(position) if black == veteran: if position.result() == 1: veteranWins += 1 elif position.result() == -1: beginnerWins += 1 else: print("No one wins!!") else: if position.result() == 1: beginnerWins += 1 elif position.result() == -1: veteranWins += 1 else: print("No one wins!!") print("The model wins " + str(veteranWins)) print("The random wins " + str(beginnerWins)) return veteranWins - beginnerWins
def play(board_size, network, readouts, resign_threshold, simultaneous_leaves, verbosity=0): """Plays out a self-play match. Args: board_size: the go board size network: the DualNet model readouts: the number of readouts in MCTS resign_threshold: the threshold to resign at in the match simultaneous_leaves: the number of simultaneous leaves in MCTS verbosity: the verbosity of the self-play match Returns: the final position the n x 362 tensor of floats representing the mcts search probabilities the n-ary tensor of floats representing the original value-net estimate where n is the number of moves in the game. """ player = MCTSPlayer(board_size, network, resign_threshold=resign_threshold, verbosity=verbosity, num_parallel=simultaneous_leaves) # Disable resign in 5% of games if random.random() < 0.05: player.resign_threshold = -1.0 player.initialize_game() # Must run this once at the start, so that noise injection actually # affects the first move of the game. first_node = player.root.select_leaf() prob, val = network.run(first_node.position) first_node.incorporate_results(prob, val, first_node) while True: start = time.time() player.root.inject_noise() current_readouts = player.root.N # we want to do "X additional readouts", rather than "up to X readouts". while player.root.N < current_readouts + readouts: player.tree_search() if verbosity >= 3: print(player.root.position) print(player.root.describe()) if player.should_resign(): player.set_result(-1 * player.root.position.to_play, was_resign=True) break move = player.pick_move() player.play_move(move) if player.root.is_done(): player.set_result(player.root.position.result(), was_resign=False) break if (verbosity >= 2) or ( verbosity >= 1 and player.root.position.n % 10 == 9): print("Q: {:.5f}".format(player.root.Q)) dur = time.time() - start print("%d: %d readouts, %.3f s/100. (%.2f sec)" % ( player.root.position.n, readouts, dur / readouts * 100.0, dur)) if verbosity >= 3: print("Played >>", coords.to_kgs(coords.from_flat(player.root.fmove))) if verbosity >= 2: print("%s: %.3f" % (player.result_string, player.root.Q), file=sys.stderr) print(player.root.position, player.root.position.score(), file=sys.stderr) return player