def expand(self, node): # print("expanded") # call on an expandable node to create one more child node -- we add the child to the leaf # then we update the current node we are at to this leaf node # choose an action -- choose randomly action_index = randint(0, len(node.untried_actions) - 1) action = node.untried_actions[action_index] # remove that action from the untried action list node.untried_actions.remove(action) # create the new node to be added to the game tree child = self.create_node(node.board, Board.get_opp_piece_type(node.colour), action, node) # apply the move to that child node # the parent applies its move to that board child.board.update_board(action, node.colour) child.update_actions() # add this child to the parents child list node.add_child(child) self.num_nodes += 1 return child
def simulate(self, node): start_time = time() # simulate the state based on the actions of the child # this is the board on which we do our simulation board = deepcopy(node.board) #print(board.move_counter) #print(board.phase) #print(board.piece_pos) available_actions = board.update_actions(board, node.colour) #print("available_actions") #print(available_actions) #print(board.is_terminal()) colour = node.colour num_moves = 0 while board.is_terminal() is False: if len(available_actions) == 0: # there are no actions to take and therefore it is a forfeit action = None else: # pick a random move -- actions action_ind = randint(0, len(available_actions) - 1) action = available_actions[action_ind] # apply the action to the board board.update_board(action, colour) #board.print_board() # update the colour of the piece colour = Board.get_opp_piece_type(colour) # update the new available actions list -- this action list represents the # actions of next player -- this is the player that will make the next move available_actions = board.update_actions(board, colour) end_time = time() # print((end_time - start_time)) # now we are at a terminal state, we need to find out who has won #print(board.winner) if board.winner == node.colour: return 1 elif board.winner == Board.get_opp_piece_type(node.colour): return -1 elif board.winner is None: return 0
class Player: def __init__(self, colour): if colour == 'white': self.colour = constant.WHITE_PIECE elif colour == 'black': self.colour = constant.BLACK_PIECE # each players internal board representation self.board = Board() self.opponent = self.board.get_opp_piece_type(self.colour) def update(self, action): # update the board based on the action of the opponent # get move type if self.board.phase == constant.PLACEMENT_PHASE: self.board.update_board(action, self.opponent) elif self.board.phase == constant.MOVING_PHASE: if isinstance(action[0], tuple) is False: raise InvalidAction direction = self.board.convert_coord_to_direction( action[0], action[1]) self.board.update_board((action[0], direction), self.opponent) def action(self, turns): available_actions = self.board.update_actions(self.colour) available_actions.sort() for i, action in enumerate(available_actions): print(str(i) + " : " + str(action)) print("+" * 50) index = int(input("Enter move for {}: ".format(self.colour))) next_move = available_actions[index] print("+" * 50) print(self.board.move_counter) if self.board.phase == constant.PLACEMENT_PHASE: # making moves during the placement phase self.board.update_board(next_move, self.colour) return next_move else: new_pos = self.board.convert_direction_to_coord( next_move[0], next_move[1]) # making moves during the placement phase self.board.update_board(next_move, self.colour) return next_move[0], new_pos
def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.total_time = 0 # load the evaluation function based on the colour of the player if self.player == constant.WHITE_PIECE: self.evaluation = Evaluation("./XML", "/white_weights") else: self.evaluation = Evaluation("./XML", "/black_weights")
class Player: def __init__(self, colour): if colour == 'white': self.colour = constant.WHITE_PIECE elif colour == 'black': self.colour = constant.BLACK_PIECE # each players internal board representation self.board = Board() self.opponent = self.board.get_opp_piece_type(self.colour) def update(self, action): # update the board based on the action of the opponent # get move type if self.board.phase == constant.PLACEMENT_PHASE: self.board.update_board(action, self.opponent) elif self.board.phase == constant.MOVING_PHASE: if isinstance(action[0], tuple) is False: raise InvalidAction direction = self.board.convert_coord_to_direction( action[0], action[1]) self.board.update_board((action[0], direction), self.opponent) # print("UPDATE BOARD _______________________________") # print(self.board) # print("UPDATE BOARD _______________________________") def action(self, turns): available_actions = self.board.update_actions(self.colour) next_action = available_actions[randint(0, len(available_actions) - 1)] if self.board.phase == constant.PLACEMENT_PHASE: # making moves during the placement phase self.board.update_board(next_action, self.colour) # print(next_action) return next_action else: new_pos = self.board.convert_direction_to_coord( next_action[0], next_action[1]) # making moves during the placement phase # print(next_action) self.board.update_board(next_action, self.colour) #print(next_action) return next_action[0], new_pos
def negascout(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str, colour, phase=self.board.phase) if key is not None: board_str = key[0] entry = self.tt.get_entry(board_str, colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move #print(move_to_try) #print("FOUND ENTRY IN TT") if tt_depth >= depth: if tt_type == constant.TT_EXACT: #print("FOUND PV") return tt_value, tt_best_move elif tt_type == constant.TT_LOWER: if tt_value > alpha: #print("FOUND FAIL SOFT") alpha = tt_value elif tt_type == constant.TT_UPPER: if tt_value < beta: #print("FOUND FAIL HARD") beta = tt_value if alpha >= beta: return tt_value, tt_best_move actions = self.board.update_actions(colour) # actions = actions_1 actions = self.board.sort_actions(actions, colour) #actions = actions_1 # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None if move_to_try is not None and move_to_try in actions: #print("MOVE ORDERING") # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 if len(actions) <= 12: favourable = actions elif 12 < len(actions) < 20: favourable = actions[:12] else: favourable = actions[:len(actions) // 2] # print(len(actions)) # start negascout here for i, action in enumerate(favourable): # skip over the best action in the tt table if action == move_to_try and i > 0: continue elim = self.board.update_board(action, colour) # if we are at the first node -- this is the best node we have found so far # therefore we do a full search on this node if i == 0: # do a full search on the best move found so far score, _ = self.negascout(depth - 1, -beta, -alpha, opponent) score = -score else: # assume that the first move is the best move we have found so far, # therefore to see if this is the case we can do a null window search on the # rest of the moves, if the search breaks, then we know that the first move is # the best move and it will return the best move # but if the search "failed high" - i.e. the score is between alpha and beta # we need to do a full research of the node to work out the minimax value # do the null window search score, _ = self.negascout(depth - 1, -alpha - 1, -alpha, opponent) score = -score # if it failed high, then we just do a full search to find the actual best move if alpha < score < beta: score, _ = self.negascout(depth - 1, -beta, -score, opponent) score = -score # get the best value and score if best_val < score: best_val = score best_action = action # reset alpha if alpha < score: alpha = score # undo the action applied to the board -- we can now apply another move to the board self.undo_actions(action, colour, elim) # test for alpha beta cutoff if alpha >= beta: break # store the values in the transposition table if best_val <= original_alpha: # then this is an upperbound -FAILHARD tt_type = constant.TT_UPPER elif best_val >= beta: tt_type = constant.TT_LOWER # print("LOWER") else: tt_type = constant.TT_EXACT # print("EXACT") # add the entry to the transposition table self.tt.add_entry(self.board.board_state, colour, best_val, tt_type, best_action, depth) return best_val, best_action
def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour)
class Player: def __init__(self, colour): # set the colour of the player if colour == 'white': self.colour = constant.WHITE_PIECE elif colour == 'black': self.colour = constant.BLACK_PIECE # each players internal board representation self.board = Board() # set up the minimax search strategy -- NEGAMAX self.minimax = Negamax(self.board, self.colour, "/eval_weights") # set the colour of the opponent self.opponent = self.board.get_opp_piece_type(self.colour) # set up the mini-max return values self.depth_eval = 0 self.minimax_val = 0 self.policy_vector = 0 # initialise the action book self.action_book = ActionBook(self.colour) def update(self, action): # update the board based on the action of the opponent if self.board.phase == constant.PLACEMENT_PHASE: # update board also returns the pieces of the board that will be eliminated self.board.update_board(action, self.opponent) self.minimax.update_board(self.board) elif self.board.phase == constant.MOVING_PHASE: if isinstance(action[0], tuple) is False: print("ERROR: action is not a tuple") return # get the "to" square direction using the provided positions move_type = self.board.convert_coord_to_direction( action[0], action[1]) # update the player board representation with the action self.board.update_board((action[0], move_type), self.opponent) def action(self, turns): # update the negamax/minimax board representation self.minimax.update_board(self.board) # reset the move counter of the board if turns == 0 and self.board.phase == constant.MOVING_PHASE: self.board.move_counter = 0 self.board.phase = constant.MOVING_PHASE # check the action book to see if there is a state board_state = self.board.board_state if self.board.phase == constant.PLACEMENT_PHASE: action = self.action_book.check_state(board_state) # check if the action is legal if action is not None and self.board.check_free_square( action) is True: # return the action found and update the board representations self.board.update_board(action, self.colour) self.minimax.update_board(self.board) return action # if there is no found state in the action book, therefore we just do a negamax search best_move = self.minimax.itr_negamax() self.depth_eval = self.minimax.eval_depth self.minimax_val = self.minimax.minimax_val # do an alpha beta search on this node # once we have found the best move we must apply it to the board representation if self.board.phase == constant.PLACEMENT_PHASE: self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return best_move else: # if we are in moving phase, return the correctly formatted positions if best_move is None: self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return None new_pos = Board.convert_direction_to_coord(best_move[0], best_move[1]) self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return best_move[0], new_pos
def negamax(self, depth, alpha, beta, colour): # print(self.board.board_state) # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str, colour, phase=self.board.phase) if key is not None: # get the value mappings from the dictionary board_str = key[0] entry = self.tt.get_entry(board_str, colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move if tt_depth >= depth: # this is the PV node therefore this is the best move that we have found so far if tt_type == constant.TT_EXACT: return tt_value, tt_best_move # the minimax value in the transposition table is a lower bound to the search elif tt_type == constant.TT_LOWER: if tt_value > alpha: alpha = tt_value # the value in the table corresponds to a beta cutoff and therefore it is an upper bound for beta elif tt_type == constant.TT_UPPER: if tt_value < beta: beta = tt_value # test for cutoff -- return the best move found so far if alpha >= beta: return tt_value, tt_best_move # obtain the actions and sort them actions = self.board.update_actions(colour) actions = self.board.sort_actions(actions, colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the negamax search search best_val = -inf best_action = None # if we have found a best action to take in the transposition table, this should be the first # move we should try -- put this at the start of the list of actions if move_to_try is not None and move_to_try in actions: # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 # split the list of actions into favourable and unfavourable actions # we only consider to search teh favourable actions if the action list is long enough if len(actions) <= 12: favourable = actions elif 12 < len(actions) < 20: favourable = actions[:12] else: favourable = actions[:len(actions) // 2] # iterate only through the favourable moves, ensuring that the number of moves is not too big # the aim is to reduce the branching factor as much as we can, but also having enough moves to # evaluate such that we get the part of the optimality decision making from negamax/minimax # rather than a purely greedy approach. # print(len(favourable)) for action in favourable: # skip over the best action in the tt table -- this action has already be searched if action == move_to_try and i != 0: continue i += 1 # update the board, record the eliminated pieces from that update elim = self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) score = -score # undo the action applied to the board self.undo_action(action, colour, elim) # get the best score and action so far if score > best_val: best_val = score best_action = action # update alpha if needed if best_val > alpha: alpha = best_val # test for cut off if alpha >= beta: break # store the values in the transposition table if best_val <= original_alpha: # then this is an upper bound tt_type = constant.TT_UPPER elif best_val >= beta: # if the best value we have found is a lower bound tt_type = constant.TT_LOWER # print("LOWER") else: # this is the PV node value tt_type = constant.TT_EXACT # add the entry to the transposition table self.tt.add_entry(self.board.board_state, colour, best_val, tt_type, best_action, depth) return best_val, best_action
class Player: def __init__(self, colour): if colour == 'white': self.colour = constant.WHITE_PIECE elif colour == 'black': self.colour = constant.BLACK_PIECE self.available_moves = [] # each players internal board representation self.board = Board() # TODO -- need to see if this works correctly self.minimax = Negascout(self.board, self.colour) self.opponent = self.board.get_opp_piece_type(self.colour) self.depth_eval = 0 self.minimax_val = 0 self.policy_vector = 0 def update(self, action): # update the board based on the action of the opponent if self.board.phase == constant.PLACEMENT_PHASE: # update board also returns the pieces of the board that will be eliminated self.board.update_board(action, self.opponent) # self.board.eliminated_pieces[self.opponent] self.minimax.update_board(self.board) elif self.board.phase == constant.MOVING_PHASE: if isinstance(action[0], tuple) is False: print("ERROR: action is not a tuple") return direction = self.board.convert_coord_to_direction(action[0], action[1]) # update the player board representation with the action self.board.update_board((action[0], direction), self.opponent) self.minimax.update_board(self.board) def action(self, turns): self.minimax.update_board(self.board) if turns == 0 and self.board.phase == constant.MOVING_PHASE: self.board.move_counter = 0 self.board.phase = constant.MOVING_PHASE # find the best move best_move = self.minimax.itr_negascout() # if the best move we have found so far is a Forfeit -- return this if best_move is None: self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return None self.depth_eval = self.minimax.eval_depth self.minimax_val = self.minimax.minimax_val # once we have found the best move we must apply it to the board representation if self.board.phase == constant.PLACEMENT_PHASE: # print(best_move) self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return best_move else: # (best_move is None) # print(best_move[0],best_move[1]) new_pos = Board.convert_direction_to_coord(best_move[0], best_move[1]) self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return best_move[0], new_pos
def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) # for evaluation dic = {self.player: 1, self.opponent: -1} # generate legal actions actions = self.board.update_actions(colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None #print(self.board) #print(actions) #print(self.board.white_pieces) # print(self.board.black_pieces) # generate legal actions # actions = self.board.update_actions(colour) # print("THESE ACTIONS----------------") # print(actions) # print(self.board) # print("*"*30) for action in actions: # print("THIS CALL--------") # print(self.board) # print("THIS CALL--------") # if self.board.phase == constant.MOVING_PHASE: # piece = self.board.get_piece(action[0]) # direction = action[1] # if piece.is_legal_move(direction) is False: # print(actions) # print(self) # print("WHYYYYYYYYYYYYYY--------------------------------------------") # print(action[0], direction, colour) # print(piece) # print(piece.get_legal_actions()) elim = self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) self.undo_action(action, colour, elim) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score if alpha >= beta: break return best_val, best_action