def to_leaf(self, c_puct: float, position: C4Game) -> 'MCTSNode': """ Traverses the tree from current node to a leaf node Parameters ---------- c_puct: `float` Constant controlling exploration position: `C4Game` The position of the current game state, which will be automatically updated as the tree is traversed to a leaf node. Returns ------- leaf_node: `MCTSNode` The leaf node found after tree traversal """ if USE_ITERATIVE_FOR_GREEDY_TRAVERSAL: curr = self while True: if curr.move is not None: position.play_move(curr.move) if not curr.children: return curr child_scores = [c.value(c_puct) for c in curr.children] curr = curr.children[child_scores.index(max(child_scores))] raise Exception("Tree traversal error") # BELOW: OLD RECURSIVE ALGORITHM if self.move is not None: # move is (None, None) if passing position.play_move(self.move) if not self.children: return self # more performant than np.argmax by a lot # select the best child child_scores = [c.value(c_puct) for c in self.children] return (self.children[child_scores.index(max(child_scores))].to_leaf( c_puct, position))
def to_leaf(self, c_puct: float, position: C4Game) -> 'MCTSNode': """ Traverses the tree from current node to a leaf node Parameters ---------- c_puct: `float` Constant controlling exploration position: `C4Game` The position of the current game state, which will be automatically updated as the tree is traversed to a leaf node. Returns ------- leaf_node: `MCTSNode` The leaf node found after tree traversal """ if USE_ITERATIVE_FOR_GREEDY_TRAVERSAL: curr = self while True: curr.VL += 1 if curr.move is not None: position.play_move(curr.move) if not curr.children or curr.terminal: return curr child_scores = [c.value(c_puct) for c in curr.children] curr = curr.children[child_scores.index(max(child_scores))] raise Exception("Tree traversal error") # BELOW: RECURSIVE ALGORITHM self.VL += 1 if self.move is not None: position.play_move(self.move) if not self.children: return self # more performant than np.argmax by a lot # select the best child max_child_score = float('-inf') max_child_index = 0 for i, c in enumerate(self.children): v = c.value(c_puct) if v > max_child_score: max_child_score = v max_child_index = i if DO_SEARCH_TREE_PRUNING: if max_child_score < -1: # all losing, this move is won self.terminal = True self.terminal_score = 1 return self elif max_child_score == float('inf'): # this move is lost self.prune = True # this node will never be selected again return (self.children[max_child_index].to_leaf(c_puct, position))
def do_selfplay(num: int, playouts: int, c_puct: float, mdl: Model, dir_alpha: float, temp_cutoff: int, mcts_batch_size: int) -> tuple: """ Do and save to a file some selfplay games Parameters ---- num: `int` The number of selfplay games to make playouts: `int` The amount of playouts in MCTS c_puct: `float` PUCT for MCTS mdl: `tensorflow.keras.models.Model` Model used for predictions dir_alpha: `float` Dirichlet noise alpha value Yields ------ `Tuple[np.ndarray, int, int]` """ for game_num in range(num): print(f'Starting self-play game {game_num + 1}/{num}') game = C4Game() searcher = MCTS(game, True, mdl, c_puct, playouts, dir_alpha=dir_alpha, batch_size=mcts_batch_size) state_logs = [] move_logs = [] move_search_logs = [] while game.check_terminal() is None: # temperature decay move_search_logs.append(np.array(searcher.playout_to_max())) move = searcher.pick_move( temp=1 if len(game.move_history) < temp_cutoff else 1e-3) state_logs.append(game.state) move_logs.append(move) game.play_move(move) # tree reuse searcher_ = MCTS(game, True, mdl, c_puct, playouts, dir_alpha=dir_alpha, batch_size=mcts_batch_size) for n in searcher.top_node.children: if n.move == move: n.move = None n.parent = None n.P = None searcher_.top_node = n break searcher = searcher_ yield state_logs, game.check_terminal(), move_logs, move_search_logs
def fast_selfplay(playouts: int, c_puct: float, dir_alpha: float, temp_cutoff: int, force_seed: int = None): if force_seed is None: force_seed = random.randint(1, 4294967295) # spawn process sub = Popen('./standalone/Release_x64/C4UCT.exe', cwd='./standalone/Release_x64', universal_newlines=True, stdin=PIPE, stdout=PIPE, stderr=PIPE) sub.stdin.write(f'ssp\nseed {force_seed}\nc_puct set {c_puct}\n' f'dir_alpha set {dir_alpha}\n' f'temp_cutoff set {temp_cutoff}\n' f'playouts set {playouts}\nsspgo\n') sub.stdin.flush() # ignore some lines we don't want while True: line = sub.stdout.readline().strip() if line.startswith('seed set to '): break game = C4Game() state_logs = [] move_logs = [] move_search_logs = [] while True: info = sub.stdout.readline().strip() if info == 'done': break move = info[-1] info = info[:-3] probs = map(float, info.split(' ')) move_search_logs.append(np.array(list(probs))) state_logs.append(game.state) move_logs.append(int(move)) game.play_move(int(move)) sub.kill() return state_logs, game.check_terminal(), move_logs, move_search_logs
def vs_ai(mdl: Model, go_first: bool = True) -> None: """ Allows a human player to play against the AI Parameters ---------- mdl: `keras.models.Model` The neural network to use go_first: `bool` True of the player wishes to go first, else False """ game = C4Game() moves = 0 print('Starting the game!') print(game) while game.check_terminal() is None: if moves % 2 != int(go_first): # human while True: try: move = int(input('Move (0 to 6): ')) game.play_move(move) break except Exception: pass else: # ai searcher = MCTS(game, False, mdl, 3, 30, 10) searcher.search_for_time(10) print(searcher.top_node.N) move = searcher.pick_move() game.play_move(move) # pv pv = searcher.get_pv() print(f'Expected win prob: {round((pv[0].Q / 2 + 0.5) * 100, 2)}%') if pv[0].Q < -0.95 and len(game.move_history) > 30: print(game, '\nI resign!') break print(game) moves += 1 print('Game over!')
def expand(self, priors: np.ndarray, position: C4Game) -> None: """ Adds children to the current node Parameters ---------- priors: A vector of the NN's prior probabilities for each child in order position: The game state required to reach this node """ allowed = position.legal_moves() for (mv, prior) in enumerate(priors): move = mv if allowed[move]: # empty square position.play_move(move) term = position.check_terminal() is_term = term is not None self.children.append(MCTSNode(self, move, prior, is_term, term if term is not None else 0)) position.undo_move()
def main(): global SEARCH_THREAD global POSITION SEARCH_THREAD = None # prepare the model MODEL._make_predict_function() tf.get_default_graph().finalize() searching = False while True: inp = input() if SEARCH_THREAD is None or SEARCH_THREAD.stopped(): searching = False # check if inp.startswith('go'): nodes = 2000 match_n = re.match(r'^go n ?=? ?(\d+)', inp) # match node match_t = re.match(r'^go t ?=? ?(\d+)', inp) # match time if match_n: nodes = int(match_n.group(1)) SEARCH_THREAD = SearchThread(nodes=nodes) elif match_t: stime = int(match_t.group(1)) SEARCH_THREAD = SearchThread(stime=stime) else: SEARCH_THREAD = SearchThread() SEARCH_THREAD.start() searching = True if inp == 'd': print(POSITION) if inp == 'stop' and searching: searching = False SEARCH_THREAD.stop() if inp == 'isready': print('readyok') if inp.startswith('mv') and not searching: try: move = int(inp.split(' ')[1]) POSITION.play_move(move) except Exception as e: print(e) continue if inp == 'undo' and not searching: try: POSITION.undo_move() except Exception: continue if inp == 'static' and not searching: # show static evaluation of policy net value, policy = MODEL.predict(np.expand_dims(POSITION.state, 0)) print(f'V={value[0][0]}') print('\n'.join(f'MV={i} P={round(100 * p, 2)}%' for i, p in enumerate(policy[0]))) if inp.startswith('position') and not searching: inp = inp.split(' ') if len(inp) < 2: continue if inp[1] == 'startpos': POSITION = C4Game() if len(inp) > 3 and inp[2] == 'moves': for m in inp[3:]: try: POSITION.play_move(int(m)) except Exception as e: print(e) POSITION = C4Game() if len(inp) > 3 and inp[1] == 'set': POSITION = C4Game() pstr = inp[2] # position string representation gstr = '' # geometric string representation for c in pstr: if c.upper() in 'XO/': gstr += c.upper() if c.isdigit(): gstr += ' ' * int(c) gstr = gstr.split('/') rpos = np.array([list(x) for x in gstr]) # rotated position pos90 = np.rot90(rpos, k=3) # rotated correctly mat = np.zeros((7, 6)) - (pos90 == 'X') + (pos90 == 'O') POSITION.position = mat POSITION.to_move = -1 if inp[3].upper() == 'X' else 1 POSITION.position_history = [mat.copy()] inp = ' '.join(inp) if inp.startswith('image'): print(np.moveaxis(POSITION.state, 2, 0)) if len(inp.split(' ')) == 2: fout = inp.split(' ')[1] # file out name try: cv2.imwrite(fout, POSITION.state * 255) except Exception as e: print(e) if inp == 'exit' or inp == 'quit': print('Goodbye ~ uwu') os.sys.exit()
import cv2 import numpy as np import tensorflow as tf from keras.models import Model, load_model from c4game import C4Game # from mcts import MCTS from mcts_v2 import MCTS os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # network MODEL_FILE = './testXVI/save_2071.ntwk' MODEL = load_model(MODEL_FILE) POSITION = C4Game() class SearchThread(threading.Thread): def __init__(self, *, stime: int = None, nodes: int = 5000): super(SearchThread, self).__init__() self._stopping = threading.Event() self._target = search self._args = (POSITION, MODEL) if stime is not None: self._kwargs = {'stime': stime} else: self._kwargs = {'nodes': nodes} def stop(self):
Alternate version of play_vs_ai.py but with PONDERING PONDERING is when the engine thinks in the opponent's time """ import sys import threading import tensorflow as tf from keras.models import Model, load_model from c4game import C4Game from mcts_v2 import MCTS # network MODEL_FILE = './testXVI/save_2071.ntwk' MODEL = load_model(MODEL_FILE) POSITION = C4Game() ENG_POSITION = C4Game() MODEL._make_predict_function() tf.get_default_graph().finalize() class SearchThread(threading.Thread): def __init__(self): super(SearchThread, self).__init__() self._stopping = threading.Event() self._finished = threading.Event() self._target = search self._args = (ENG_POSITION, MODEL, ENGINE) self._kwargs = {}