def game_step(): """ this function is called everytime a player/ai wants to play a move on the board, the position passed is in [0,63] where 0 is bottom right corner Returns ------- json : json black_board, white_board, legal_moves, player, done """ global board_state, board_legal_moves # play the move chosen by human pos = int(request.form['position']) # ai steps if pos is -1 if (pos == -1): a = ai_player_move(board_state, board_legal_moves) else: # human player move a = 1 << pos board_state, board_legal_moves, player, done = env.step(board_state, a) # return new states return jsonify(black_board=get_set_bits_list(board_state[0]), white_board=get_set_bits_list(board_state[1]), legal_moves=get_set_bits_list(board_legal_moves), player=player, done=done)
def __init__(self, s, legal_moves, m=-1, terminal=0, parent=None): """Initializer for the node class, tree is a collection of nodes Parameters ---------- s : tuple the board state represented as bitboards legal_moves : 64 bit int bits corresponding to legal positions are set to 1 in this int m : int (optional) the move played to get to this state, this denotes the position from the right end of array (not 64 bit) terminal : int (optional) flag denoting whether this is a leaf node parent : int (optional) index of the parent of this node in the mcts node list """ self.state = s self.legal_moves = legal_moves # convert the 64 bit legal moves into a set of positions # for fast use later self.legal_moves_set = get_set_bits_list(legal_moves) np.random.shuffle(self.legal_moves_set) # to compare whether all children have been added or not self.total_legal_moves = get_total_set_bits(legal_moves) self.w = 0 self.n = 0 self.N = 0 self.children = [] # since we have shuffled the legal_moves_set, we can use total_children # as the idx from where we have to pick the next unexplored move self.total_children = 0 self.move = m self.terminal = terminal self.parent = parent
def move(self, board, legal_moves, value=None): """Get the action with maximum Q value Parameters ---------- board : Numpy array The board state on which to calculate best action value : None, optional Kept for consistency with other agent classes Returns ------- output : Numpy array Selected action using the argmax function """ # use the agent model to make the predictions if np.random.random() > self.epsilon and legal_moves: model_outputs = self._get_model_outputs(board, self._model)[0] legal_moves = self._converter.convert(legal_moves, input_format='bitboard_single', output_format='ndarray')\ .reshape((1,-1))[0] return 1 << int((63 - np.argmax( np.where(legal_moves == 1, model_outputs, -np.inf)))) else: if (not legal_moves): a = 0 return 1 << get_random_move_from_list( get_set_bits_list(legal_moves))
def coin_choice(): """ this function is called after the player has chosen which coin to play with, the environment is reset here and the board data is returned in json format Returns ------- json : json black_board, white_board, legal_moves, player, done (if game ended or not), ai_player_coin (0/1), score_display_html """ global board_state, board_legal_moves, ai_player_coin, env # get the color in the ajax call and reset board accordingly c = request.form['color'] # set ai_player color accordingly if (c == 'white'): ai_player_coin = 0 elif (c == 'black'): ai_player_coin = 1 else: # c == 'random' if (random() < 0.5): ai_player_coin = 0 else: ai_player_coin = 1 # reset the environment done = 0 board_state, board_legal_moves, player = env.reset() # read the html to render for score display with open('templates/score_display.html', 'r') as f: score_display_html = f.read() # append the reset button to html with open('templates/reset.html', 'r') as f: score_display_html += f.read() # modify this html if necessary if (ai_player_coin == 1): score_display_html = score_display_html\ .replace('AI (Black)', 'AI (White)')\ .replace('You (White)', 'You (Black)') # return the boards and other data, html return jsonify(black_board=get_set_bits_list(board_state[0]), white_board=get_set_bits_list(board_state[1]), legal_moves=get_set_bits_list(board_legal_moves), player=player, done=done, ai_player_coin=ai_player_coin, score_display_html=score_display_html)
def move(self, s, legal_moves): """Select a move randomly, given the board state and the set of legal moves Parameters ---------- s : tuple contains black and white bitboards and current player legal_moves : int (64 bit) legal states are set to 1 Returns ------- a : int (64 bit) bitboard representing position to play """ if (not legal_moves): return 0 return 1 << get_random_move_from_list(get_set_bits_list(legal_moves))
def train(self, n=100): """Train the MCTS tree for n number of iterations Parameters ---------- n : int (optional) the number of simulation steps to run """ while (n): n -= 1 ############################## ####### Selection Phase ###### ############################## """select a node in the tree that is neither a leaf node nor fully explored""" e = 0 while (True): node = self._node_list[e] if(node.total_legal_moves != \ node.total_children or \ node.terminal == 1): # at least one unexplored move is present, stop the # selection here break else: # since all nodes of previous node were explored at least # once, we go to the next level and select the child # with highest ucb1 next_node = None best_ucb1 = -np.inf for idx in node.children: ucb1 = self._node_list[idx].get_ucb1(self._c) if (ucb1 > best_ucb1): best_ucb1 = ucb1 next_node = idx e = next_node # this defaults to the root in case the else condition is not run node, node_idx = self._node_list[e], e ############################## ####### Expansion Phase ###### ############################## """select one of the child nodes for this node which is unexplored""" if (not node.terminal): """first get a random move from the moves which have not been added to the mcts tree yet""" # m = self.get_not_added_move(node) m = node.legal_moves_set[node.total_children] # play the game and add new node to tree (node list) next_state, next_legal_moves, _, done = \ self._env.step(node.state, 1<<m) node = Node(s=next_state.copy(), legal_moves=next_legal_moves, m=m, terminal=done, parent=e) # add node to node list self._node_list.append(node) # add the idx in this list to the parent's children list self._node_list[e].add_child(len(self._node_list) - 1) node_idx = len(self._node_list) - 1 ############################## ###### Simulation Phase ###### ############################## """play till the end by randomly selecting moves starting from the newly created node (in case of terminal node this step is skipped""" s = node.state legal_moves = node.legal_moves if (node.terminal != 1): done = 0 while (not done): a = get_random_move_from_list( get_set_bits_list(legal_moves)) s, legal_moves, _, done = self._env.step(s, 1 << a) winner = self._env.get_winner(s) ############################## #### Backpropagation Phase ### ############################## """backproagate the winner value from node (from where we started to play) to root to update statistical parameters for each node""" while (True): node.n += 1 # update the value of N in children for c in node.children: self._node_list[c].N = node.n if (winner != -1): node.w += (1 - winner == self._env.get_player(node.state)) else: # tie node.w += 0.5 # move one level up if (node.parent is None): break else: node, node_idx = self._node_list[node.parent], node.parent
def move(self, s, legal_moves, current_depth=0, get_max=1, alpha=-np.inf, beta=np.inf): """Select a move randomly, given the board state and the set of legal moves Parameters ---------- s : tuple contains black and white bitboards and current player legal_moves : int (64 bit) legal states are set to 1 current_depth : int tracks the depth in the recursion get_max : int denotes whether to play as maximum/original player, only useful when recursion depth > 1, 1 is max and 0 is min player alpha : int tracks the maximum among all the nodes, useful for pruning beta : int tracks the minimum among all the nodes, useful for pruning Returns ------- a : int (64 bit) bitboard representing position to play """ # max player if (current_depth == 0): self._player = self._env.get_player(s) # get the indices of the legal moves move_list = get_set_bits_list(legal_moves) h_list = [] m_list = [] for m in move_list: s_next, legal_moves, _, done = self._env.step(s, 1 << m) if (current_depth < self._depth and not done): h_list.append( self.move(s_next, legal_moves, current_depth + 1, 1 - get_max, alpha, beta)) else: h_list.append( self._board_heuristics(legal_moves, get_max, s_next)) m_list.append(m) # print(current_depth, h_list, m, legal_moves, s, alpha, beta) # adjust alpha and beta # print(current_depth, alpha, beta, h_list[-1], # len(move_list), m, get_max) if (get_max): alpha = max(alpha, h_list[-1]) else: beta = min(beta, h_list[-1]) if (beta <= alpha): break # return the best move if (current_depth == 0): return 1 << m_list[np.argmax(h_list)] if (get_max): return alpha else: return beta