def run_game(b1_role, mcts_role, b1_N, mcts_N, model, rand=0): print(f'b1 ({" ".join(b1_role)}), N={b1_N}') print(f'mcts ({" ".join(mcts_role)}), N={mcts_N}') curb1 = B1Node(propnet, data, model=model) curmcts = MCTSNode(propnet, data) board = [list('.' * 8) for i in range(6)] for step in range(1000): print(*(''.join(b) for b in board[::-1]), sep='\n') legal = curb1.propnet.legal_moves_dict(curb1.data) b1_moves = choose_move(curb1, b1_role, b1_N, legal, step < rand) mcts_moves = choose_move(curmcts, mcts_role, mcts_N, legal, step < rand) taken_moves = dict( list(zip(b1_role, b1_moves)) + list(zip(mcts_role, mcts_moves))) moves = tuple(taken_moves[role] for role in propnet.roles) curb1 = curb1.get_or_make_child(moves) curmcts = curmcts.get_or_make_child(moves) print('Moves were:') for move in propnet.legal: if move.id in moves and move.move_gdl.strip() != 'noop': print(move.move_role, move.move_gdl) if 'drop' in move.move_gdl: col = int(move.move_gdl.split()[2]) - 1 for i in range(len(board)): if board[i][col] == '.': board[i][col] = move.move_role[0] break if curb1.terminal: print(*(''.join(b) for b in board[::-1]), sep='\n') break print('Results:', curb1.scores) return (sum(curb1.scores[role] for role in b1_role), sum(curb1.scores[role] for role in mcts_role))
def test_add_child_idempotency(self): root = MCTSNode(go.Position()) child = root.maybe_add_child(17) current_children = copy.copy(root.children) child2 = root.maybe_add_child(17) self.assertEqual(child, child2) self.assertEqual(current_children, root.children)
def play(agentBlack, agentWhite, vizualize=False): state = game.State.init() agents = [agentBlack, agentWhite] mctsNodes = [MCTSNode(state)] * 2 if agentBlack is agentWhite else [ MCTSNode(state), MCTSNode(state) ] experience = [] end = False i = 0 while not end: turn = i % 2 agent = agents[turn] mctsNode = mctsNodes[turn] action = agent.get_action(mctsNode) mcts_policy = agent.get_mcts_policy(mctsNode) mctsNodes[0] = mctsNodes[0].next(action) mctsNodes[1] = mctsNodes[1].next(action) experience.append([state, mcts_policy]) state = game.transition(state, action) if vizualize: print_board(state) end = state.isEnd i += 1 z = state.endResult return experience, z
def test_add_child(self): root = MCTSNode(utils_test.BOARD_SIZE, go.Position(utils_test.BOARD_SIZE)) child = root.maybe_add_child(17) self.assertIn(17, root.children) self.assertEqual(child.parent, root) self.assertEqual(child.fmove, 17)
def initialize_game(self, position=None): if position is None: position = go.Position() self.root = MCTSNode(position) self.result = 0 self.comments = [] self.searches_pi = [] self.qs = []
def test_select_leaf(self): probs = np.array([.02] * (go.N * go.N + 1)) probs[kgs_to_flat('D9')] = 0.4 root = MCTSNode(SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, 0, root) self.assertEqual(root.position.to_play, go.WHITE) self.assertEqual(root.select_leaf(), root.children[kgs_to_flat('D9')])
def initialize_game(self, position=None): if position is None: position = go.Position(self.board_size) self.root = MCTSNode(self.board_size, position) self.result = 0 self.result_string = None self.comments = [] self.searches_pi = [] self.qs = []
def test_select_leaf(self): flattened = coords.to_flat(coords.from_kgs('D9')) probs = np.array([.02] * (go.N * go.N + 1)) probs[flattened] = 0.4 root = MCTSNode(SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, 0, root) self.assertEqual(root.position.to_play, go.WHITE) self.assertEqual(root.select_leaf(), root.children[flattened])
def play(self, endtime): root = MCTSNode(self.propnet) for i in range(500): simulation(root) root.print_node() best, choice = -1, None for i, c in root.move_counts[self.role].items(): if c > best: best, choice = c, i move = self.propnet.id_to_move[choice].move_gdl print('Made move', move) return move
def test_select_leaf(self): flattened = coords.to_flat( utils_test.BOARD_SIZE, coords.from_kgs(utils_test.BOARD_SIZE, 'D9')) probs = np.array([.02] * (utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1)) probs[flattened] = 0.4 root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, 0, root) self.assertEqual(root.position.to_play, go.WHITE) self.assertEqual(root.select_leaf(), root.children[flattened])
def test_do_not_explore_past_finish(self): probs = np.array([0.02] * (go.N * go.N + 1), dtype=np.float32) root = MCTSNode(go.Position()) root.select_leaf().incorporate_results(probs, 0, root) first_pass = root.maybe_add_child(coords.to_flat(None)) first_pass.incorporate_results(probs, 0, root) second_pass = first_pass.maybe_add_child(coords.to_flat(None)) with self.assertRaises(AssertionError): second_pass.incorporate_results(probs, 0, root) node_to_explore = second_pass.select_leaf() # should just stop exploring at the end position. self.assertEqual(node_to_explore, second_pass)
def test_do_not_explore_past_finish(self): probs = np.array([0.02] * (go.N * go.N + 1), dtype=np.float32) root = MCTSNode(go.Position()) root.select_leaf().incorporate_results(probs, 0, root) first_pass = root.maybe_add_child(coords.flatten_coords(None)) first_pass.incorporate_results(probs, 0, root) second_pass = first_pass.maybe_add_child(coords.flatten_coords(None)) with self.assertRaises(AssertionError): second_pass.incorporate_results(probs, 0, root) node_to_explore = second_pass.select_leaf() # should just stop exploring at the end position. self.assertEqual(node_to_explore, second_pass)
def test_dont_pick_unexpanded_child(self): probs = np.array([0.001] * (go.N * go.N + 1)) # make one move really likely so that tree search goes down that path twice # even with a virtual loss probs[17] = 0.999 root = MCTSNode(go.Position()) root.incorporate_results(probs, 0, root) leaf1 = root.select_leaf() self.assertEqual(leaf1.fmove, 17) leaf1.add_virtual_loss(up_to=root) # the second select_leaf pick should return the same thing, since the child # hasn't yet been sent to neural net for eval + result incorporation leaf2 = root.select_leaf() self.assertIs(leaf1, leaf2)
def test_action_flipping(self): np.random.seed(1) probs = np.array([.02] * (go.N * go.N + 1)) probs = probs + np.random.random([go.N * go.N + 1]) * 0.001 black_root = MCTSNode(go.Position()) white_root = MCTSNode(go.Position(to_play=go.WHITE)) black_root.select_leaf().incorporate_results(probs, 0, black_root) white_root.select_leaf().incorporate_results(probs, 0, white_root) # No matter who is to play, when we know nothing else, the priors # should be respected, and the same move should be picked black_leaf = black_root.select_leaf() white_leaf = white_root.select_leaf() self.assertEqual(black_leaf.fmove, white_leaf.fmove) self.assertEqualNPArray(black_root.child_action_score, white_root.child_action_score)
def initialize_game(self, position=None): if position is None: position = go.Position() self.root = MCTSNode(position) self.result = 0 self.result_string = None self.comments = [] self.searches_pi = [] self.qs = []
def play_game(model_first, model_second): config_first, config_second = model_first.config, model_second.config def get_eval_fn(model): def eval_fn(state): with torch.no_grad(): v, p = model.fit_batch((np.array([state]), ), train=False) return v, p[0] return eval_fn eval_first, eval_second = map(get_eval_fn, [model_first, model_second]) set_config(config_first) start_state = get_start_state() curr = MCTSNode(start_state, evaluator=eval_first) next = MCTSNode(start_state, evaluator=eval_second) config, next_config = config_first, config_second info = [] for _ in RangeProgress(0, config_first.board_dim**2, desc='Moves'): set_config(config) start = time() if config.eval_mcts_iterations == 0: score = curr.p else: for _ in RangeProgress(0, config.eval_mcts_iterations, desc='MCTS'): curr.select() score = curr.N move = np.unravel_index(score.argmax(), score.shape) info.append( dict(state=curr.state, curr_p=curr.p, curr_v=curr.value, curr_W=curr.W, curr_N=curr.N, next_p=next.p, next_v=next.value, move=move, time=time() - start)) next, curr = curr.step(move), next.step(move) config, next_config = next_config, config if curr.terminal: break merged_info = {k: [info_i[k] for info_i in info] for k in info[0].keys()} merged_info['state'].append(curr.state) merged_info['curr_v'].append(-1) merged_info['next_v'].append(-1) return {k: np.array(v) for k, v in merged_info.items()}
def test_backup_incorporate_results(self): probs = np.array([.02] * ( utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1)) root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, 0, root) leaf = root.select_leaf() leaf.incorporate_results(probs, -1, root) # white wins! # Root was visited twice: first at the root, then at this child. self.assertEqual(root.N, 2) # Root has 0 as a prior and two visits with value 0, -1 self.assertAlmostEqual(root.Q, -1/3) # average of 0, 0, -1 # Leaf should have one visit self.assertEqual(root.child_N[leaf.fmove], 1) self.assertEqual(leaf.N, 1) # And that leaf's value had its parent's Q (0) as a prior, so the Q # should now be the average of 0, -1 self.assertAlmostEqual(root.child_Q[leaf.fmove], -0.5) self.assertAlmostEqual(leaf.Q, -0.5) # We're assuming that select_leaf() returns a leaf like: # root # \ # leaf # \ # leaf2 # which happens in this test because root is W to play and leaf was a W win. self.assertEqual(root.position.to_play, go.WHITE) leaf2 = root.select_leaf() leaf2.incorporate_results(probs, -0.2, root) # another white semi-win self.assertEqual(root.N, 3) # average of 0, 0, -1, -0.2 self.assertAlmostEqual(root.Q, -0.3) self.assertEqual(leaf.N, 2) self.assertEqual(leaf2.N, 1) # average of 0, -1, -0.2 self.assertAlmostEqual(leaf.Q, root.child_Q[leaf.fmove]) self.assertAlmostEqual(leaf.Q, -0.4) # average of -1, -0.2 self.assertAlmostEqual(leaf.child_Q[leaf2.fmove], -0.6) self.assertAlmostEqual(leaf2.Q, -0.6)
def test_action_flipping(self): np.random.seed(1) probs = np.array([.02] * (go.N * go.N + 1)) probs = probs + np.random.random([go.N * go.N + 1]) * 0.001 black_root = MCTSNode(go.Position()) white_root = MCTSNode(go.Position(to_play=go.WHITE)) black_root.select_leaf().incorporate_results(probs, 0, black_root) white_root.select_leaf().incorporate_results(probs, 0, white_root) # No matter who is to play, when we know nothing else, the priors # should be respected, and the same move should be picked black_leaf = black_root.select_leaf() white_leaf = white_root.select_leaf() self.assertEqual(black_leaf.fmove, white_leaf.fmove) self.assertEqualNPArray( black_root.child_action_score, white_root.child_action_score)
def test_never_select_illegal_moves(self): probs = np.array([0.02] * (go.N * go.N + 1)) # let's say the NN were to accidentally put a high weight on an illegal move probs[1] = 0.99 root = MCTSNode(SEND_TWO_RETURN_ONE) root.incorporate_results(probs, 0, root) # and let's say the root were visited a lot of times, which pumps up the # action score for unvisited moves... root.N = 100000 root.child_N[root.position.all_legal_moves()] = 10000 # this should not throw an error... leaf = root.select_leaf() # the returned leaf should not be the illegal move self.assertNotEqual(leaf.fmove, 1) # and even after injecting noise, we should still not select an illegal move for i in range(10): root.inject_noise() leaf = root.select_leaf() self.assertNotEqual(leaf.fmove, 1)
def test_backup_incorporate_results(self): probs = np.array([.02] * (utils_test.BOARD_SIZE * utils_test.BOARD_SIZE + 1)) root = MCTSNode(utils_test.BOARD_SIZE, SEND_TWO_RETURN_ONE) root.select_leaf().incorporate_results(probs, 0, root) leaf = root.select_leaf() leaf.incorporate_results(probs, -1, root) # white wins! # Root was visited twice: first at the root, then at this child. self.assertEqual(root.N, 2) # Root has 0 as a prior and two visits with value 0, -1 self.assertAlmostEqual(root.Q, -1 / 3) # average of 0, 0, -1 # Leaf should have one visit self.assertEqual(root.child_N[leaf.fmove], 1) self.assertEqual(leaf.N, 1) # And that leaf's value had its parent's Q (0) as a prior, so the Q # should now be the average of 0, -1 self.assertAlmostEqual(root.child_Q[leaf.fmove], -0.5) self.assertAlmostEqual(leaf.Q, -0.5) # We're assuming that select_leaf() returns a leaf like: # root # \ # leaf # \ # leaf2 # which happens in this test because root is W to play and leaf was a W win. self.assertEqual(root.position.to_play, go.WHITE) leaf2 = root.select_leaf() leaf2.incorporate_results(probs, -0.2, root) # another white semi-win self.assertEqual(root.N, 3) # average of 0, 0, -1, -0.2 self.assertAlmostEqual(root.Q, -0.3) self.assertEqual(leaf.N, 2) self.assertEqual(leaf2.N, 1) # average of 0, -1, -0.2 self.assertAlmostEqual(leaf.Q, root.child_Q[leaf.fmove]) self.assertAlmostEqual(leaf.Q, -0.4) # average of -1, -0.2 self.assertAlmostEqual(leaf.child_Q[leaf2.fmove], -0.6) self.assertAlmostEqual(leaf2.Q, -0.6)
def select_move(self, game_state): possible_moves = game_state.legal_moves(self) nodes = [] cur_round = 0 if self.DEBUG: print( '******************************************************************************************' ) print( 'MCTSAgent {0} thinking about move on world tick {1}.'.format( self.name, game_state.world_tick)) print('...There are {0} moves that can be explored in {1} rounds.'. format(len(possible_moves), self.num_rounds)) # Init all possible moves that can be explored for possible_move in possible_moves: nodes.append(MCTSNode(possible_move)) for cur_round in range(1, self.num_rounds + 1): simulation_node = self.select_child(nodes) agent_score = self.simulate_random_game(game_state, simulation_node.move, 50) if self.DEBUG: print('......Explored move {0} which scored {1}.'.format( simulation_node.move, agent_score)) simulation_node.num_rollouts += 1 simulation_node.total_score += agent_score # Now we need to pick from the best moves (in the case of a tie, we just choose a random one) best_score = -99999999 best_moves = [] for node in nodes: if node.num_rollouts > 0: # Only choose a move if we've done a simulation on it. if (not best_moves) or node.avg_score() > best_score: best_moves = [node.move] best_score = node.avg_score() elif node.avg_score( ) == best_score: # This is as good as our previous best move best_moves.append(node.move) if self.DUMP: print( '******************************************************************************************' ) unique_rollouts = 0 print('................. DUMP...................') print( 'MCTSAgent {0} thinking about move on world tick {1}.'.format( self.name, game_state.world_tick)) for node in nodes: print( '...Action {0} was explored {1} times with avg score of {2}' .format(node.move, node.num_rollouts, node.avg_score())) if node.num_rollouts > 0: unique_rollouts += 1 print('.........................................') print('') print( 'MCTSAgent {0} finished thinking about move on world tick {1}.' .format(self.name, game_state.world_tick)) print( ' -> Found {0} possible moves within {1} unique rollouts, having a score of {2}' .format(len(best_moves), unique_rollouts, best_score)) if len(best_moves) == 1: print(' -> Best move was {0} for a score of {1}.'.format( best_moves[0], best_score)) print(' -> Ships remaining: {0}'.format( game_state.world.get_ship_count())) else: print(' -> Multiple best moves: {0}'.format(len(best_moves))) print(' -> Ships remaining: {0}'.format( game_state.world.get_ship_count())) print( '******************************************************************************************' ) print('') if len(best_moves) == 0: return None # For variety, randomly select among all equally good moves. return random.choice(best_moves)
from collections import deque from atari.pytorch.NNet import NNetWrapper from atari.AtariGame import AtariGame from mcts import MCTSNode from GameSession import GameSession game_i = AtariGame() view = game_i.getInitBoard() nnet_checkpoint = NNetWrapper(game_i) mtsc_root = MCTSNode(view, action_size=game_i.getActionSize()) nnet_checkpoint.load_checkpoint() gameSession = GameSession(nnet_checkpoint, game_i, mtsc_root) for x in range(1500): gameSession.execute_episode() print(gameSession.max_score) train_examples = [] for y in range(50): train_examples += gameSession.play_game() #gameSession.play_game(render=True) nnet_checkpoint.train(train_examples) nnet_checkpoint.save_checkpoint()
class MCTSPlayerMixin: # If `simulations_per_move` is nonzero, it will perform that many reads # before playing. Otherwise, it uses `seconds_per_move` of wall time. def __init__(self, network, seconds_per_move=5, simulations_per_move=0, resign_threshold=-0.90, verbosity=0, two_player_mode=False, num_parallel=8): self.network = network self.seconds_per_move = seconds_per_move self.simulations_per_move = simulations_per_move self.verbosity = verbosity self.two_player_mode = two_player_mode if two_player_mode: self.temp_threshold = -1 else: self.temp_threshold = TEMPERATURE_CUTOFF self.num_parallel = num_parallel self.qs = [] self.comments = [] self.searches_pi = [] self.root = None self.result = 0 self.result_string = None self.resign_threshold = -abs(resign_threshold) super().__init__() def initialize_game(self, position=None): if position is None: position = go.Position() self.root = MCTSNode(position) self.result = 0 self.result_string = None self.comments = [] self.searches_pi = [] self.qs = [] def suggest_move(self, position): ''' Used for playing a single game. For parallel play, use initialize_move, select_leaf, incorporate_results, and pick_move ''' start = time.time() if self.simulations_per_move == 0: while time.time() - start < self.seconds_per_move: self.tree_search() else: current_readouts = self.root.N while self.root.N < current_readouts + self.simulations_per_move: self.tree_search() if self.verbosity > 0: print("%d: Searched %d times in %s seconds\n\n" % ( position.n, self.simulations_per_move, time.time() - start), file=sys.stderr) # print some stats on anything with probability > 1% if self.verbosity > 2: print(self.root.describe(), file=sys.stderr) print('\n\n', file=sys.stderr) if self.verbosity > 3: print(self.root.position, file=sys.stderr) return self.pick_move() def play_move(self, c): ''' Notable side effects: - finalizes the probability distribution according to this roots visit counts into the class' running tally, `searches_pi` - Makes the node associated with this move the root, for future `inject_noise` calls. ''' if not self.two_player_mode: self.searches_pi.append( self.root.children_as_pi(self.root.position.n < self.temp_threshold)) self.qs.append(self.root.Q) # Save our resulting Q. self.comments.append(self.root.describe()) try: self.root = self.root.maybe_add_child(coords.to_flat(c)) except go.IllegalMove: print("Illegal move") if not self.two_player_mode: self.searches_pi.pop() self.qs.pop() self.comments.pop() return False self.position = self.root.position # for showboard del self.root.parent.children return True # GTP requires positive result. def pick_move(self): '''Picks a move to play, based on MCTS readout statistics. Highest N is most robust indicator. In the early stage of the game, pick a move weighted by visit count; later on, pick the absolute max.''' if self.root.position.n > self.temp_threshold: fcoord = np.argmax(self.root.child_N) else: cdf = self.root.child_N.cumsum() cdf /= cdf[-1] selection = random.random() fcoord = cdf.searchsorted(selection) assert self.root.child_N[fcoord] != 0 return coords.from_flat(fcoord) def tree_search(self, num_parallel=None): if num_parallel is None: num_parallel = self.num_parallel leaves = [] failsafe = 0 while len(leaves) < num_parallel and failsafe < num_parallel * 2: failsafe += 1 leaf = self.root.select_leaf() if self.verbosity >= 4: print(self.show_path_to_root(leaf)) # if game is over, override the value estimate with the true score if leaf.is_done(): value = 1 if leaf.position.score() > 0 else -1 leaf.backup_value(value, up_to=self.root) continue leaf.add_virtual_loss(up_to=self.root) leaves.append(leaf) if leaves: move_probs, values = self.network.run_many( [leaf.position for leaf in leaves]) for leaf, move_prob, value in zip(leaves, move_probs, values): leaf.revert_virtual_loss(up_to=self.root) leaf.incorporate_results(move_prob, value, up_to=self.root) def show_path_to_root(self, node): pos = node.position diff = node.position.n - self.root.position.n if len(pos.recent) == 0: return def fmt(move): return "{}-{}".format('b' if move.color == 1 else 'w', coords.to_kgs(move.move)) path = " ".join(fmt(move) for move in pos.recent[-diff:]) if node.position.n >= MAX_DEPTH: path += " (depth cutoff reached) %0.1f" % node.position.score() elif node.position.is_game_over(): path += " (game over) %0.1f" % node.position.score() return path def should_resign(self): '''Returns true if the player resigned. No further moves should be played''' return self.root.Q_perspective < self.resign_threshold def set_result(self, winner, was_resign): self.result = winner if was_resign: string = "B+R" if winner == go.BLACK else "W+R" else: string = self.root.position.result_string() self.result_string = string def to_sgf(self, use_comments=True): assert self.result_string is not None pos = self.root.position if use_comments: comments = self.comments or ['No comments.'] comments[0] = ("Resign Threshold: %0.3f\n" % self.resign_threshold) + comments[0] else: comments = [] return sgf_wrapper.make_sgf(pos.recent, self.result_string, white_name=os.path.basename( self.network.save_file) or "Unknown", black_name=os.path.basename( self.network.save_file) or "Unknown", comments=comments) def is_done(self): return self.result != 0 or self.root.is_done() def extract_data(self): assert len(self.searches_pi) == self.root.position.n assert self.result != 0 for pwc, pi in zip(go.replay_position(self.root.position, self.result), self.searches_pi): yield pwc.position, pi, pwc.result def chat(self, msg_type, sender, text): default_response = "Supported commands are 'winrate', 'nextplay', 'fortune', and 'help'." if self.root is None or self.root.position.n == 0: return "I'm not playing right now. " + default_response if 'winrate' in text.lower(): wr = (abs(self.root.Q) + 1.0) / 2.0 color = "Black" if self.root.Q > 0 else "White" return "{:s} {:.2f}%".format(color, wr * 100.0) elif 'nextplay' in text.lower(): return "I'm thinking... " + self.root.most_visited_path() elif 'fortune' in text.lower(): return "You're feeling lucky!" elif 'help' in text.lower(): return "I can't help much with go -- try ladders! Otherwise: " + default_response else: return default_response
class MCTSPlayerMixin: # If 'simulations_per_move' is nonzero, it will perform that many reads before playing. # Otherwise, it uses 'seconds_per_move' of wall time' def __init__(self, network, seconds_per_move=5, simulations_per_move=0, resign_threshold=-0.90, verbosity=0, two_player_mode=False, num_parallel=8): self.network = network self.seconds_per_move = seconds_per_move self.simulations_per_move = simulations_per_move self.verbosity = verbosity self.two_player_mode = two_player_mode if two_player_mode: self.temp_threshold = -1 else: self.temp_threshold = TEMPERATURE_CUTOFF self.num_parallel = num_parallel self.qs = [] self.comments = [] self.searches_pi = [] self.root = None self.result = 0 self.result_string = None self.resign_threshold = -abs(resign_threshold) super().__init__() def initialize_game(self, position=None): if position is None: position = go.Position() self.root = MCTSNode(position) self.result = 0 self.result_string = None self.comments = [] self.searches_pi = [] self.qs = [] def suggest_move(self, position): ''' Used for playing a single game. For parallel play, use initialize_move, select_leaf, incorporate_results, and pick_move ''' start = time.time() if self.simulations_per_move == 0: while time.time() - start < self.seconds_per_move: self.tree_search() else: current_readouts = self.root.N while self.root.N < current_readouts + self.simulations_per_move: self.tree_search() if self.verbosity > 0: print("%d: Searched %d times in %s seconds\n\n" % ( position.n, self.simulations_per_move, time.time() - start), file=sys.stderr) # print some stats on anything with probability > 1% if self.verbosity > 2: print(self.root.describe(), file=sys.stderr) print('\n\n', file=sys.stderr) if self.verbosity > 3: print(self.root.position, file=sys.stderr) return self.pick_move() def play_move(self, c): ''' Notable side effects: - finalizes the probability distribution according to this roots visit counts into the class' running tally, `searches_pi` - Makes the node associated with this move the root, for future `inject_noise` calls. ''' if not self.two_player_mode: self.searches_pi.append( self.root.children_as_pi(self.root.position.n < self.temp_threshold)) self.qs.append(self.root.Q) # Save our resulting Q. self.comments.append(self.root.describe()) self.root = self.root.maybe_add_child(coords.to_flat(c)) self.position = self.root.position # for showboard del self.root.parent.children return True # GTP requires positive result. def pick_move(self): '''Picks a move to play, based on MCTS readout statistics. Highest N is most robust indicator. In the early stage of the game, pick a move weighted by visit count; later on, pick the absolute max.''' if self.root.position.n > self.temp_threshold: fcoord = np.argmax(self.root.child_N) else: cdf = self.root.child_N.cumsum() cdf /= cdf[-1] selection = random.random() fcoord = cdf.searchsorted(selection) assert self.root.child_N[fcoord] != 0 return coords.from_flat(fcoord) def tree_search(self, num_parallel=None): if num_parallel is None: num_parallel = self.num_parallel leaves = [] failsafe = 0 while len(leaves) < num_parallel and failsafe < num_parallel * 2: failsafe += 1 leaf = self.root.select_leaf() if self.verbosity >= 4: print(self.show_path_to_root(leaf)) # if game is over, override the value estimate with the true score if leaf.is_done(): value = 1 if leaf.position.score() > 0 else -1 leaf.backup_value(value, up_to=self.root) continue leaf.add_virtual_loss(up_to=self.root) leaves.append(leaf) if leaves: move_probs, values = self.network.run_many( [leaf.position for leaf in leaves]) for leaf, move_prob, value in zip(leaves, move_probs, values): leaf.revert_virtual_loss(up_to=self.root) leaf.incorporate_results(move_prob, value, up_to=self.root) def show_path_to_root(self, node): pos = node.position diff = node.position.n - self.root.position.n if len(pos.recent) == 0: return def fmt(move): return "{}-{}".format('b' if move.color == 1 else 'w', coords.to_kgs(move.move)) path = " ".join(fmt(move) for move in pos.recent[-diff:]) if node.position.n >= MAX_DEPTH: path += " (depth cutoff reached) %0.1f" % node.position.score() elif node.position.is_game_over(): path += " (game over) %0.1f" % node.position.score() return path def should_resign(self): '''Returns true if the player resigned. No further moves should be played''' return self.root.Q_perspective < self.resign_threshold def set_result(self, winner, was_resign): self.result = winner if was_resign: string = "B+R" if winner == go.BLACK else "W+R" else: string = self.root.position.result_string() self.result_string = string def to_sgf(self, use_comments=True): assert self.result_string is not None pos = self.root.position if use_comments: comments = self.comments or ['No comments.'] comments[0] = ("Resign Threshold: %0.3f\n" % self.resign_threshold) + comments[0] else: comments = [] return sgf_wrapper.make_sgf(pos.recent, self.result_string, white_name=self.network.name or "Unknown", black_name=self.network.name or "Unknown", comments=comments) def extract_data(self): assert len(self.searches_pi) == self.root.position.n assert self.result != 0 for pwc, pi in zip(go.replay_position(self.root.position, self.result), self.searches_pi): yield pwc.position, pi, pwc.result def chat(self, msg_type, sender, text): default_response = "Supported commands are 'winrate', 'nextplay', 'fortune', and 'help'." if self.root is None or self.root.position.n == 0: return "I'm not playing right now. " + default_response if 'winrate' in text.lower(): wr = (abs(self.root.Q) + 1.0) / 2.0 color = "Black" if self.root.Q > 0 else "White" return "{:s} {:.2f}%".format(color, wr * 100.0) elif 'nextplay' in text.lower(): return "I'm thinking... " + self.root.most_visited_path() elif 'fortune' in text.lower(): return "You're feeling lucky!" elif 'help' in text.lower(): return "I can't help much with go -- try ladders! Otherwise: " + default_response else: return default_response
#!/usr/bin/env python3.7 import sys sys.path.insert(1, '/Users/Cameron/Desktop/transfer_ggp') from model import Model from mcts import MCTSNode, simulation from propnet.propnet import load_propnet import time start = time.time() # propnet = load_propnet('connect4match1') # propnet = load_propnet('tictactoe1') data, propnet = load_propnet('connectFour') root = MCTSNode(propnet, data) # exit(0) for i in range(400): simulation(root) root.print_node() print('Took', time.time() - start, 'seconds')
def set_mcts(self, state): self.head = MCTSNode(state, evaluator=evaluator)
def test_add_child(self): root = MCTSNode(go.Position()) child = root.maybe_add_child(17) self.assertIn(17, root.children) self.assertEqual(child.parent, root) self.assertEqual(child.fmove, 17)
def _root(): state = TicTacToeState([None] * 9, True, None, None) return MCTSNode(state)