def play(self): """Function to play a game vs the AI.""" print("Start Human vs AI\n") mcts = MonteCarloTreeSearch(self.net) game = self.game.clone() # Create a fresh clone for each game. game_over = False value = 0 node = TreeNode() print("Enter your move in the form: row, column. Eg: 1,1") go_first = input("Do you want to go first: y/n?") if go_first.lower().strip() == 'y': print("You play as X") human_value = 1 game.print_board() else: print("You play as O") human_value = -1 # Keep playing until the game is in a terminal state. while not game_over: # MCTS simulations to get the best child node. # If player_to_eval is 1 play as the Human. # Else play as the AI. if game.current_player == human_value: action = input("Enter your move: ") if isinstance(action, str): action = [int(n, 10) for n in action.split(",")] action = (1, action[0], action[1]) best_child = TreeNode() best_child.action = action else: best_child = mcts.search(game, node, CFG.temp_final) action = best_child.action game.play_action(action) # Play the child node's action. game.print_board() game_over, value = game.check_game_over(game.current_player) best_child.parent = None node = best_child # Make the child node the root node. if value == human_value * game.current_player: print("You won!") elif value == -human_value * game.current_player: print("You lost.") else: print("Draw Match") print("\n")
def play_against_network(evaluator, opponent_evaluator, color, conf): # evaluators[0] for black player, evaluators[1] for white player evaluators = [evaluator, opponent_evaluator] if color == WHITE: evaluators[0], evaluators[1] = evaluators[1], evaluators[0] # create search trees for both players roots = [None, None] for i in range(2): roots[i] = TreeNode(None, None, evaluators[i], conf) # black player goes first (0 for black, 1 for white) player = 0 previous_action = None t = 0 while t < conf.MAX_GAME_LENGTH: # perform MCTS for _ in range(conf.NUM_SIMULATIONS): tree_search(roots[player], evaluators[player], conf) # calculate the distribution of action selection # temperature tau -> 0 m = max(roots[player].n) p = [0 if x < m else 1 for x in roots[player].n] s = sum(p) pi = np.array([x / s for x in p], dtype=np.float32) # choose an action action = np.random.choice(conf.NUM_ACTIONS, p=pi) # take the action for i in range(2): if roots[i].children[action] is None: roots[i].children[action] = \ TreeNode(roots[i], action, evaluators[i], conf) roots[i] = roots[i].children[action] # release memory roots[i].parent.children = None t += 1 # switch to the other player player = 1 - player # game terminates when both players pass if previous_action is not None \ and previous_action == conf.PASS \ and action == conf.PASS: break previous_action = action score_black, score_white = roots[0].go.score() return (score_black > score_white) == (color == BLACK)
def play(self): datas, node = [], TreeNode() mc = MonteCarloTreeSearch(self.net) move_count = 0 while True: if move_count < TEMPTRIG: pi, next_node = mc.search(self.board, node, temperature=1) else: pi, next_node = mc.search(self.board, node) datas.append([self.board.gen_state(), pi, self.board.c_player]) self.board.move(next_node.action) next_node.parent = None node = next_node if self.board.is_draw(): reward = 0. break if self.board.is_game_over(): reward = 1. break self.board.trigger() move_count += 1 datas = np.asarray(datas) datas[:, 2][datas[:, 2] == self.board.c_player] = reward datas[:, 2][datas[:, 2] != self.board.c_player] = -reward return datas
def play_game(self, game, training_data): """Loop for each self-play game. Runs MCTS for each game state and plays a move based on the MCTS output. Stops when the game is over and prints out a winner. Args: game: An object containing the game state. training_data: A list to store self play states, pis and vs. """ mcts = MonteCarloTreeSearch(self.net) game_over = False value = 0 self_play_data = [] count = 0 node = TreeNode() # Keep playing until the game is in a terminal state. while not game_over: # MCTS simulations to get the best child node. if count < CFG.temp_thresh: best_child, prob_vector = mcts.search(game, node, CFG.temp_init) else: best_child, prob_vector = mcts.search(game, node, CFG.temp_final) # Store state, prob and v for training. if best_child != None: self_play_data.append( [deepcopy(game.state), deepcopy(prob_vector), 0]) action = best_child.action game.play_action(action) # Play the child node's action. count += 1 # print('Next player is', game.current_player) game_over, value = game.check_game_over(game.current_player) best_child.parent = None node = best_child # Make the child node the root node. else: self_play_data.append( [deepcopy(game.state), deepcopy(prob_vector), 0]) game.current_player *= -1 # print('NO ACTION TAKEN, Next player is', game.current_player) # Update v as the value of the game result. print('FINAL SCORES ARE ', game.score) for game_state in self_play_data: value = -value game_state[2] = value self.augment_data(game_state, training_data, game.row, game.column)
def evaluate(self): """Play self-play games between the two networks and record game stats. Returns: Wins and losses count from the perspective of the current network. """ wins = 0 losses = 0 # Self-play loop for i in range(self.num_eval_games): print("Start Evaluation Self-Play Game:", i, "\n") game = self.game.clone() # Create a fresh clone for each game. game_over = False value = 0 node = TreeNode() # player = game.current_player # Keep playing until the game is in a terminal state. while not game_over: # MCTS simulations to get the best child node. # If player_to_eval is 1 play using the current network # Else play using the evaluation network. # if game.current_player == 1: best_child = self.current_mcts.search(game, node, self.temp_final) # else: # best_child = self.eval_mcts.search(game, node, # self.temp_final) action = best_child.action game.play_action(action) # Play the child node's action. game_over, value = game.check_game_over() best_child.parent = None node = best_child # Make the child node the root node. game.print_board() final_score = game.evaluate() print('Score : ', final_score, ' (% of best score possible : ', np.round(final_score*100/game.maxScore, 2), '%)') if value == 1: print("win") wins += 1 elif value == -1: print("loss") losses += 1 else: print("draw") print("\n") return wins, losses
def go(self): print("One rule:\r\n Move piece form 'x,y' \r\n eg 1,3\r\n") print("-" * 60) print("Ready Go") mc = MonteCarloTreeSearch(self.net, 1000) node = TreeNode() board = Board() while True: if board.c_player == BLACK: action = input(f"Your piece is 'O' and move: ") action = [int(n, 10) for n in action.split(",")] action = action[0] * board.size + action[1] next_node = TreeNode(action=action) else: _, next_node = mc.search(board, node) board.move(next_node.action) board.show() next_node.parent = None node = next_node if board.is_draw(): print("-" * 28 + "Draw" + "-" * 28) return if board.is_game_over(): if board.c_player == BLACK: print("-" * 28 + "Win" + "-" * 28) else: print("-" * 28 + "Loss" + "-" * 28) return board.trigger()
def play_game(self, game, training_data): """Loop for each self-play game. Runs MCTS for each game state and plays a move based on the MCTS output. Stops when the game is over and prints out a winner. Args: game: An object containing the game state. training_data: A list to store self play states, pis and vs. """ mcts = MonteCarloTreeSearch(self.net) game_over = False value = 0 self_play_data = [] count = 0 node = TreeNode() # Keep playing until the game is in a terminal state. while not game_over: # MCTS simulations to get the best child node. if count < self.temp_thresh: best_child = mcts.search(game, node, self.temp_init) else: best_child = mcts.search(game, node, self.temp_final) # Store state, prob and v for training. self_play_data.append([ deepcopy(game.state['state']), deepcopy(best_child.parent.child_psas), 0 ]) action = best_child.action game.play_action(action) # Play the child node's action. count += 1 ''' TO BE COMPLETED !! ''' game_over, value = game.check_game_over() best_child.parent = None node = best_child # Make the child node the root node. # Update v as the value of the game result. for game_state in self_play_data: value = -value game_state[2] = value self.augment_data(game_state, training_data, game.row, game.column)
def evaluate(self, result): self.net.eval() self.evl_net.eval() if random.randint(0, 1) == 1: players = { BLACK: (MonteCarloTreeSearch(self.net), "net"), WHITE: (MonteCarloTreeSearch(self.evl_net), "eval"), } else: players = { WHITE: (MonteCarloTreeSearch(self.net), "net"), BLACK: (MonteCarloTreeSearch(self.evl_net), "eval"), } node = TreeNode() while True: _, next_node = players[self.board.c_player][0].search( self.board, node) self.board.move(next_node.action) if self.board.is_draw(): result[0] += 1 return if self.board.is_game_over(): if players[self.board.c_player][1] == "net": result[1] += 1 else: result[2] += 1 return self.board.trigger() next_node.parent = None node = next_node
def play_against_human(model_file, human_plays_black): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = torch.load(model_file) conf = model['conf'] # load the network network = ZetaGoNetwork(conf) network.load_state_dict(model['best_network']) network.to(device) # create a evaluator evaluator = DefaultEvaluator(network, device) # create a search tree root = TreeNode(None, None, evaluator, conf) gui = GUI(conf) human_turn = human_plays_black previous_action = None while True: if human_turn: # wait for human player's action action = gui.wait_for_action(root.go) else: # calculate computer's action gui.update_text('Computer is thinking...') # perform MCTS for _ in range(conf.NUM_SIMULATIONS): tree_search(root, evaluator, conf) # calculate the distribution of action selection # temperature tau -> 0 m = max(root.n) p = [0 if x < m else 1 for x in root.n] s = sum(p) pi = np.array([x / s for x in p], dtype=np.float32) # choose an action action = np.random.choice(conf.NUM_ACTIONS, p=pi) # take the action if root.children[action] is None: root.children[action] = \ TreeNode(root, action, evaluator, conf) root = root.children[action] # release memory root.parent.children = None # update GUI gui.update_go(root.go) gui.update_text('Computer passes' if action == conf.PASS else '') # game terminates when both players pass if previous_action is not None \ and previous_action == conf.PASS \ and action == conf.PASS: black_score, white_score = root.go.score() winner = 'BLACK' if black_score > white_score else 'WHITE' gui.update_text(f'{winner} wins, {black_score} : {white_score}') gui.freeze() previous_action = action human_turn = not human_turn
def self_play(evaluator, resign_threshold, conf): examples = [] allow_resign = resign_threshold > -1.0 \ and np.random.rand() >= conf.RESIGN_SAMPLE_RATE resign_value_history = None if allow_resign else [] # result undecided result = 0.0 # create a search tree root = TreeNode(None, None, evaluator, conf) previous_action = None t = 0 while t < conf.MAX_GAME_LENGTH: # perform MCTS for _ in range(conf.NUM_SIMULATIONS): tree_search(root, evaluator, conf) # we follow AlphaGo's method to calculate the resignation value # notice that children with n = 0 are skipped by setting their # value to be -1.0 (w / n > -1.0 for children with n > 0) resign_value = max( map(lambda w, n: -1.0 if n == 0 else w / n, root.w, root.n)) if not allow_resign: resign_value_history.append([resign_value, root.go.turn]) elif -1.0 < resign_value <= resign_threshold: result = 1.0 if root.go.turn == WHITE else -1.0 break # calculate the distribution of action selection # notice that illegal actions always have zero probability as # long as NUM_SIMULATION > 0 if t < conf.EXPLORATION_TIME: # temperature tau = 1 s = sum(root.n) pi = [x / s for x in root.n] else: # temperature tau -> 0 m = max(root.n) p = [0 if x < m else 1 for x in root.n] s = sum(p) pi = [x / s for x in p] # save position, distribution of action selection and turn examples.append([ extract_feature(root, conf), np.array(pi, dtype=np.float32), np.array([root.go.turn], dtype=np.float32), ]) # choose an action action = np.random.choice(conf.NUM_ACTIONS, p=pi) # take the action root = root.children[action] # release memory root.parent.children = None t += 1 # game terminates when both players pass if previous_action is not None \ and previous_action == conf.PASS \ and action == conf.PASS: break previous_action = action # calculate the scores if the result is undecided if result == 0.0: score_black, score_white = root.go.score() result = 1.0 if score_black > score_white else -1.0 # update the the game winner from the perspective of each player for i in range(len(examples)): examples[i][2] *= result return examples, resign_value_history, result
def play(self): mcts = MonteCarloTreeSearch(self.net) game = deepcopy(self.game) game_over = False value = 0 node = TreeNode() valid = 0 # self.game.colorBoard() game.print_board() while not game_over: if game.current_player == self.human_player: valid = False while valid == False: piece, refpt, rot, flip = self.get_input(game) piece.create(0, (refpt[0], refpt[1])) f = 'None' if flip == 0: f == 'None' else: f = 'h' piece.flip(f) piece.rotate(90 * rot) valid = game.valid_move(piece.points, self.human_player) if valid == False: print('You selected an illegal move, please reselect') # print('attempting', piece.points) # print('corners are ', game.corners[self.human_player]) if piece.ID not in ['I5', 'I4', 'I3', 'I2']: encoding = (refpt[0] * 14 + refpt[1]) * 91 + piece.shift + ( rot // 90) * 2 + flip else: encoding = (refpt[0] * 14 + refpt[1]) * 91 + piece.shift + ( rot // 90) * 1 + flip best_child = TreeNode() best_child.action = encoding print('CHOICE WAS MADE BY A HUMAN TO PLAY', piece.ID, '@', refpt) else: best_child = mcts.search(game, node, CFG.temp_final) action = best_child.action game.play_action(action) game.print_board() # game.colorBoard() game_over, value = game.check_game_over(game.current_player) best_child.parent = None node = best_child if value == self.human_player * game.current_player: print("You won!") elif value == -self.human_player * game.current_player: print("You lost.") else: print("Draw Match")
""" from mcts import encode_position, TreeNode from board import Position, make_board, empty_board from net import GobangModel p0 = Position(make_board(empty_board), 'a', 0, -1) p0.show() x = encode_position(p0) print x[10:15, 0, 0] print x[10:15, 0, 1] print x[10:15, 0, 2] p1 = p0.move(18) y = encode_position(p1) print y[10:15, 0, 0] print y[10:15, 0, 1] print y[10:15, 0, 2] p2 = p1.move(37) z = encode_position(p2) print z[10:15, 0, 0] print z[10:15, 0, 1] print z[10:15, 0, 2] net = GobangModel t0 = TreeNode(net, p0) print list(p0.moves()) p3 = p0.move(190) print p3
def mutual_play(network_black, network_white, device, conf): # create search trees for both players root_black = TreeNode(None, None, network_black, device, conf) root_white = TreeNode(None, None, network_white, device, conf) # create evaluators for both players evaluator_black = DefaultEvaluator(network_black, device) evaluator_white = DefaultEvaluator(network_white, device) # black player goes first root = root_black evaluator = evaluator_black previous_action = None t = 0 while t < conf.MAX_GAME_LENGTH: # both players perform MCTS, each one uses its own network for i in range(conf.NUM_SIMULATIONS): tree_search(root, evaluator, conf) # calculate the distribution of action selection # temperature tau -> 0 m = max(root.n) p = [0 if x < m else 1 for x in root.n] s = sum(p) pi = np.array([x / s for x in p], dtype=np.float32) # choose an action action = np.random.choice(conf.NUM_ACTIONS, p=pi) # take the action if root_black.children[action] is None: root_black.children[action] = \ TreeNode(root_black, action, evaluator_black, conf) root_black = root_black.children[action] if root_white.children[action] is None: root_white.children[action] = \ TreeNode(root_white, action, evaluator_white, conf) root_white = root_white.children[action] # release memory root_black.parent.children = None root_white.parent.children = None # switch to the other search tree if root.go.turn == BLACK: root = root_white evaluator = evaluator_white else: root = root_black evaluator = evaluator_black t += 1 # game terminates when both players pass if previous_action is not None \ and previous_action == conf.PASS \ and action == conf.PASS: break previous_action = action score_black, score_white = root.go.score() return score_black > score_white