class Negascout(object): def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.total_time = 0 # load the evaluation function based on the colour of the player if self.player == constant.WHITE_PIECE: self.evaluation = Evaluation("./XML", "/white_weights") else: self.evaluation = Evaluation("./XML", "/black_weights") ''' * Alpha Beta - Minimax Driver Function ''' def itr_negascout(self): colour = self.player # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: if self.board.phase == constant.PLACEMENT_PHASE: # clear the transposition table every time we want to evaluate a move in placement phase # this is to limit the size of growth self.tt.clear() # set the max depth iterations based on the phase that we are in MAX_ITER = 5 else: MAX_ITER = 11 # update the root number of pieces every time we do a search on a new node self.board.root_num_black = len(self.board.black_pieces) self.board.root_num_white = len(self.board.white_pieces) # default policy available_actions = self.board.update_actions(colour) action_set = set(available_actions) if len(available_actions) == 0: return None if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = 1500 else: self.time_alloc = 1200 # if we have reached 100 moves in the game and the game if self.total_time > 90000 or self.board.move_counter > 120: self.time_alloc = 500 # if we are near the final shrinking phase, then we can decrease the time it has to # evaluate if self.board.move_counter > 150: self.time_alloc = 150 best_depth = 1 val, move = 0, None best_move = None self.time_rem = self.time_alloc # iterative deepening begins here for depth in range(1, MAX_ITER): print(self.tt.size) print(depth) try: self.time_start = self.curr_millisecond_time() val, move = self.negascout(depth, -inf, inf, self.player) # move = self.negascout(depth,self.player) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) print(move) best_depth += 1 if move is not None and move in action_set: best_move = move except TimeOut: print("TIMEOUT") break # add the time allocated to the total time self.total_time += self.time_alloc self.eval_depth = best_depth return best_move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) def negascout(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str, colour, phase=self.board.phase) if key is not None: board_str = key[0] entry = self.tt.get_entry(board_str, colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move #print(move_to_try) #print("FOUND ENTRY IN TT") if tt_depth >= depth: if tt_type == constant.TT_EXACT: #print("FOUND PV") return tt_value, tt_best_move elif tt_type == constant.TT_LOWER: if tt_value > alpha: #print("FOUND FAIL SOFT") alpha = tt_value elif tt_type == constant.TT_UPPER: if tt_value < beta: #print("FOUND FAIL HARD") beta = tt_value if alpha >= beta: return tt_value, tt_best_move actions = self.board.update_actions(colour) # actions = actions_1 actions = self.board.sort_actions(actions, colour) #actions = actions_1 # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None if move_to_try is not None and move_to_try in actions: #print("MOVE ORDERING") # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 if len(actions) <= 12: favourable = actions elif 12 < len(actions) < 20: favourable = actions[:12] else: favourable = actions[:len(actions) // 2] # print(len(actions)) # start negascout here for i, action in enumerate(favourable): # skip over the best action in the tt table if action == move_to_try and i > 0: continue elim = self.board.update_board(action, colour) # if we are at the first node -- this is the best node we have found so far # therefore we do a full search on this node if i == 0: # do a full search on the best move found so far score, _ = self.negascout(depth - 1, -beta, -alpha, opponent) score = -score else: # assume that the first move is the best move we have found so far, # therefore to see if this is the case we can do a null window search on the # rest of the moves, if the search breaks, then we know that the first move is # the best move and it will return the best move # but if the search "failed high" - i.e. the score is between alpha and beta # we need to do a full research of the node to work out the minimax value # do the null window search score, _ = self.negascout(depth - 1, -alpha - 1, -alpha, opponent) score = -score # if it failed high, then we just do a full search to find the actual best move if alpha < score < beta: score, _ = self.negascout(depth - 1, -beta, -score, opponent) score = -score # get the best value and score if best_val < score: best_val = score best_action = action # reset alpha if alpha < score: alpha = score # undo the action applied to the board -- we can now apply another move to the board self.undo_actions(action, colour, elim) # test for alpha beta cutoff if alpha >= beta: break # store the values in the transposition table if best_val <= original_alpha: # then this is an upperbound -FAILHARD tt_type = constant.TT_UPPER elif best_val >= beta: tt_type = constant.TT_LOWER # print("LOWER") else: tt_type = constant.TT_EXACT # print("EXACT") # add the entry to the transposition table self.tt.add_entry(self.board.board_state, colour, best_val, tt_type, best_action, depth) return best_val, best_action def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False ''' * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended -- Need to work out some optimisations of the algorithm though ''' def evaluate_state(self, board, colour, actions): return self.evaluation.evaluate(board, colour, actions) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def undo_actions(self, action, colour, elim): return self.board.undo_action(action, colour, elim)
class Negamax(object): def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.evaluation = Evaluation("./XML", "/eval_weights") ''' * Alpha Beta - Minimax Driver Function ''' def itr_negamax(self): # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: #if self.board.phase == constant.PLACEMENT_PHASE: self.tt.clear() MAX_ITER = 10 # default policy available_actions = self.board.update_actions(self.board, self.player) # self.actions_leftover = self.board.update_actions(self.board, self.player) if len(available_actions) == 0: return None #else: # lets just set the default to the first move # move = available_actions[0] # time allocated per move in ms ''' self.time_alloc = 0 if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter) else: self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter) ''' self.time_alloc = 5000 # get time start_time = Negamax.curr_millisecond_time() best_depth = 1 val, move = 0, None # iterative deepening begins here for depth in range(1, MAX_ITER): print(self.tt.size) print(depth) try: self.time_rem = self.time_alloc self.time_start = self.curr_millisecond_time() val, move = self.negamax(depth, -inf, inf, self.player) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) print(move) best_depth += 1 except TimeOut: print("TIMEOUT") break if Negamax.curr_millisecond_time() - start_time > self.time_alloc: break self.eval_depth = best_depth return move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} ''' move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str,colour,phase=self.board.phase) if key is not None: board_str = key[0] entry = self.tt.get_entry(board_str,colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move #print("FOUND ENTRY IN TT") if tt_depth >= depth: if tt_type == constant.TT_EXACT: #print("FOUND PV") return tt_value, tt_best_move elif tt_type == constant.TT_LOWER: if tt_value > alpha: #print("FOUND FAIL SOFT") alpha = tt_value elif tt_type == constant.TT_UPPER: if tt_value < beta: #print("FOUND FAIL HARD") beta = tt_value if alpha >= beta: return tt_value, None ''' # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player) #*dic[colour] return val, None # do the minimax search best_val = -inf best_action = None actions = self.board.update_actions(self.board, colour) ''' if move_to_try is not None and move_to_try in actions: #print("MOVE ORDERING") # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 ''' # get the favourable moves of the board actions = self.get_favourable_actions(self.available_actions) # if there are no favourable actions to iterate on - raise if len(actions) < 0: raise ReturnUnfavourableMove for action in actions: # skip over the best action in the tt table ''' if action == move_to_try and i!= 0: continue i+=1 ''' self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score self.undo_move() if alpha >= beta: break ''' # store the values in the transposition table if best_val <= original_alpha: # then this is an upperbound -FAILHARD tt_type = constant.TT_UPPER elif best_val >= beta: tt_type = constant.TT_LOWER # print("LOWER") else: tt_type = constant.TT_EXACT # print("EXACT") ''' # add the entry to the transposition table # self.tt.add_entry(self.board.board_state,colour,best_val,tt_type,best_action, depth) return best_val, best_action def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False ''' * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended -- Need to work out some optimisations of the algorithm though ''' def evaluate_state(self, board, colour): #return Evaluation.basic_policy(board,colour) return self.evaluation.evaluate(board, self.player) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def undo_move(self): return self.board.undo_move()
class Negamax(object): def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.evaluation = Evaluation("./XML", "/eval_weights") ''' * Alpha Beta - Minimax Driver Function ''' def itr_negamax(self): # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: #if self.board.phase == constant.PLACEMENT_PHASE: self.tt.clear() MAX_ITER = 10 # default policy available_actions = self.board.update_actions(self.board, self.player) print(len(available_actions)) action_set = set(available_actions) # self.actions_leftover = self.board.update_actions(self.board, self.player) if len(available_actions) == 0: return None #else: # lets just set the default to the first move # move = available_actions[0] # time allocated per move in ms ''' self.time_alloc = 0 if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter) else: self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter) ''' # self.time_alloc = 5000 # time allocated per move in ms self.time_alloc = 0 total = 120000 if self.board.phase == constant.PLACEMENT_PHASE: #self.time_alloc = (total/2 - self.time_alloc) / (24 - self.board.move_counter) #total -= self.time_alloc self.time_alloc = 1000 else: #self.time_alloc = (total - self.time_alloc) / (100 - self.board.move_counter) #total -= self.time_alloc self.time_alloc = 1000 # get time start_time = Negamax.curr_millisecond_time() best_depth = 1 val, move = 0, None # iterative deepening begins here best_move = None for depth in range(1, MAX_ITER): print(self.tt.size) print(depth) try: self.time_rem = self.time_alloc self.time_start = self.curr_millisecond_time() val, move = self.negamax(depth, -inf, inf, self.player) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) print(move) best_depth += 1 # if we have a move that is not none lets always pick that move that is legal # becuase we are doing a greedy search -- it sometimes returns an illegal move, not too sure why # therefore here we check if a move is legal as well if move is not None and move in action_set: best_move = move except TimeOut: print("TIMEOUT") break if Negamax.curr_millisecond_time() - start_time > self.time_alloc: break self.eval_depth = best_depth return best_move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) # naive Negamax (depth limited) -- No Transposition Table def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) dic = {self.player: 1, self.opponent: -1} # generate legal actions actions_1 = self.board.update_actions(self.board, colour) # print(len(actions)) actions = self.board.sort_actions(actions_1, colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None # generate legal actions #actions = self.board.update_actions(self.board, colour) # split the actions into favourable an unfavourable # if the length of actions is greater than X, then we can just choose to look through the first # 5 'favourable' actions that we see right now # if the length of actions is less than X, then we can just evaluate all possible actions we have # THIS IS A GREEDY APPROACH TO MINIMAX THAT LIMITS OUR BRANCHING FACTOR OF THE GAME if len(actions) > 8: favourable = actions[:8] else: favourable = actions # got here #print("got here") # depth reduction R = 2 #print(favourable) #self.board.print_board() for action in favourable: self.board.update_board(action, colour) if action in favourable: score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) else: score, temp = self.negamax(depth - 1 - R, -beta, -alpha, opponent) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score self.undo_move() if alpha >= beta: break return best_val, best_action def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False ''' * NEED TO THINK ABOUT IF THIS FUNCTION JUST EVALUATES THE NODES AT THE ROOT STATE DUE TO THE UNDO MOVES -- NEED TO TEST THIS OUT SOMEHOW, because other than that the algorithm is working as intended -- Need to work out some optimisations of the algorithm though ''' def evaluate_state(self, board, colour, actions): #return Evaluation.basic_policy(board,colour) return self.evaluation.evaluate(board, colour, actions) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def undo_move(self): return self.board.undo_move()
class Negamax(object): def __init__(self, board, colour, file_name): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # timing attributes self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.total_time = 0 # load the evaluation function based on the colour of the player if self.player == constant.WHITE_PIECE: self.evaluation = Evaluation("./XML", "/white_weights") else: self.evaluation = Evaluation("./XML", "/black_weights") ''' Iterative Deepening Negamax This implements a time-cutoff such that search is terminated once we have reached the allocated time for evaluation. IT RETURNS THE BEST MOVE IT HAS FOUND IN THE TIME ALLOCATED ''' def itr_negamax(self): # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: if self.board.phase == constant.PLACEMENT_PHASE: # clear the transposition table every time we want to evaluate a move in placement phase # this is to limit the size of growth self.tt.clear() # set the max depth iterations based on the phase that we are in MAX_ITER = 5 else: MAX_ITER = 11 # update the root number of pieces every time we do a search on a new node self.board.root_num_black = len(self.board.black_pieces) self.board.root_num_white = len(self.board.white_pieces) # default policy available_actions = self.board.update_actions(self.player) # if there are no available actions to make, therefore we just return None -- this is a forfeit if len(available_actions) == 0: return None if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = 1500 else: self.time_alloc = 1200 # if we have reached 100 moves in the game and the game if self.total_time > 90000 or self.board.move_counter > 120: self.time_alloc = 500 # if we are near the final shrinking phase, then we can decrease the time it has to # evaluate if self.board.move_counter > 150: self.time_alloc = 190 best_depth = 1 val, move = 0, None # set the time remaining for each move evaluation self.time_rem = self.time_alloc # iterative deepening begins here for depth in range(1, MAX_ITER): # get the best move until cut off is reached try: self.time_start = self.curr_millisecond_time() val, move = self.negamax(depth, -inf, inf, self.player) self.time_end = self.curr_millisecond_time() # update the time remaining self.time_rem = self.time_alloc - (self.time_end - self.time_start) best_depth += 1 except TimeOut: break # add the total time to the time allocated self.total_time += self.time_alloc # print(self.total_time) print(best_depth - 1) self.eval_depth = best_depth - 1 return move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) # get the current time in milliseconds @staticmethod def curr_millisecond_time(): return int(time() * 1000) ''' NEGAMAX DRIVER FUNCTION -- THIS IMPLEMENTS THE FOLLOWING: - NEGAMAX WITH A TRANSPOSITION TABLE - MOVE ORDERING USING THE BEST MOVE WE HAVE FOUND SO FAR (IF IT EXISTS IN THE TRANSPOSITION TABLE) - MOVE ORDERING OF THE MOVES WE THINK TO BE FAVOURABLE USING A LIGHTWEIGHT EVALUATION FUNCTION - SELECTING ONLY THE TOP FAVOURABLE MOVES TO EVALUATE USING MINIMAX -- THIS IS HEAVY GREEDY PRUNING APPLIED TO NEGAMAX DESIGNED SUCH THAT WE ONLY LOOK AT MOVES THAT WE THINK WILL PRODUCE A GOOD OUTCOME, THUS PRUNING ANY MOVES THAT HAVE A HIGH CHANGE OF HAVING NO EFFECT ON THE GAME-STATE UTILITY. ''' def negamax(self, depth, alpha, beta, colour): # print(self.board.board_state) # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str, colour, phase=self.board.phase) if key is not None: # get the value mappings from the dictionary board_str = key[0] entry = self.tt.get_entry(board_str, colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move if tt_depth >= depth: # this is the PV node therefore this is the best move that we have found so far if tt_type == constant.TT_EXACT: return tt_value, tt_best_move # the minimax value in the transposition table is a lower bound to the search elif tt_type == constant.TT_LOWER: if tt_value > alpha: alpha = tt_value # the value in the table corresponds to a beta cutoff and therefore it is an upper bound for beta elif tt_type == constant.TT_UPPER: if tt_value < beta: beta = tt_value # test for cutoff -- return the best move found so far if alpha >= beta: return tt_value, tt_best_move # obtain the actions and sort them actions = self.board.update_actions(colour) actions = self.board.sort_actions(actions, colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the negamax search search best_val = -inf best_action = None # if we have found a best action to take in the transposition table, this should be the first # move we should try -- put this at the start of the list of actions if move_to_try is not None and move_to_try in actions: # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 # split the list of actions into favourable and unfavourable actions # we only consider to search teh favourable actions if the action list is long enough if len(actions) <= 12: favourable = actions elif 12 < len(actions) < 20: favourable = actions[:12] else: favourable = actions[:len(actions) // 2] # iterate only through the favourable moves, ensuring that the number of moves is not too big # the aim is to reduce the branching factor as much as we can, but also having enough moves to # evaluate such that we get the part of the optimality decision making from negamax/minimax # rather than a purely greedy approach. # print(len(favourable)) for action in favourable: # skip over the best action in the tt table -- this action has already be searched if action == move_to_try and i != 0: continue i += 1 # update the board, record the eliminated pieces from that update elim = self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) score = -score # undo the action applied to the board self.undo_action(action, colour, elim) # get the best score and action so far if score > best_val: best_val = score best_action = action # update alpha if needed if best_val > alpha: alpha = best_val # test for cut off if alpha >= beta: break # store the values in the transposition table if best_val <= original_alpha: # then this is an upper bound tt_type = constant.TT_UPPER elif best_val >= beta: # if the best value we have found is a lower bound tt_type = constant.TT_LOWER # print("LOWER") else: # this is the PV node value tt_type = constant.TT_EXACT # add the entry to the transposition table self.tt.add_entry(self.board.board_state, colour, best_val, tt_type, best_action, depth) return best_val, best_action # cut-off test -- either depth is zero or the board is at terminal state def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False # evaluate the game state def evaluate_state(self, board, colour, actions): return self.evaluation.evaluate(board, colour, actions) # update the negamax board representation for another search def update_board(self, board): self.board = deepcopy(board) # terminal state check def is_terminal(self): return self.board.is_terminal() # undo board wrapper def undo_action(self, action, colour, elim): return self.board.undo_action(action, colour, elim)
class Negamax(object): def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.evaluation = Evaluation("./XML", "/eval_weights") ''' * Alpha Beta - Minimax Driver Function ''' def itr_negamax(self): # clear the transposition table every time we make a new move -- this is to ensure that it doesn't grow too big # if self.board.phase == constant.MOVING_PHASE and self.board.move_counter == 0: #if self.board.phase == constant.PLACEMENT_PHASE: self.tt.clear() # update the root number of pieces every time we do a search on a new node self.board.root_num_black = len(self.board.black_pieces) self.board.root_num_white = len(self.board.white_pieces) # default policy available_actions = self.board.update_actions(self.player) action_set = set(available_actions) # self.actions_leftover = self.board.update_actions(self.board, self.player) if len(available_actions) == 0: return None #else: # lets just set the default to the first move # move = available_actions[0] # time allocated per move in ms ''' self.time_alloc = 0 if self.board.phase == constant.PLACEMENT_PHASE: self.time_alloc = (30000 - self.time_alloc) / (24 - self.board.move_counter) else: self.time_alloc = (30000 - self.time_alloc) / (100 - self.board.move_counter) ''' # self.time_alloc = 5000 # time allocated per move in ms self.time_alloc = 0 total = 120000 if self.board.phase == constant.PLACEMENT_PHASE: #self.time_alloc = (total/2 - self.time_alloc) / (24 - self.board.move_counter) #total -= self.time_alloc self.time_alloc = 3000 else: #self.time_alloc = (total - self.time_alloc) / (100 - self.board.move_counter) #total -= self.time_alloc self.time_alloc = 800 # get time start_time = Negamax.curr_millisecond_time() best_depth = 1 val, move = 0, None # iterative deepening begins here best_move = None for depth in range(1, MAX_ITER): print(depth) try: self.time_rem = self.time_alloc self.time_start = self.curr_millisecond_time() val, move = self.negamax(depth, -inf, inf, self.player) self.time_end = self.curr_millisecond_time() self.time_rem = self.time_alloc - (self.time_end - self.time_start) print(move) best_depth += 1 # if we have a move that is not none lets always pick that move that is legal # becuase we are doing a greedy search -- it sometimes returns an illegal move, not too sure why # therefore here we check if a move is legal as well if move is not None and move in action_set: best_move = move # print(self.board) # print("sdfsfsfsfsfsdfsfsdfs") except TimeOut: break self.eval_depth = best_depth return best_move def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour) @staticmethod def curr_millisecond_time(): return int(time() * 1000) # naive Negamax (depth limited) -- No Transposition Table def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) # for evaluation dic = {self.player: 1, self.opponent: -1} # generate legal actions actions = self.board.update_actions(colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None #print(self.board) #print(actions) #print(self.board.white_pieces) # print(self.board.black_pieces) # generate legal actions # actions = self.board.update_actions(colour) # print("THESE ACTIONS----------------") # print(actions) # print(self.board) # print("*"*30) for action in actions: # print("THIS CALL--------") # print(self.board) # print("THIS CALL--------") # if self.board.phase == constant.MOVING_PHASE: # piece = self.board.get_piece(action[0]) # direction = action[1] # if piece.is_legal_move(direction) is False: # print(actions) # print(self) # print("WHYYYYYYYYYYYYYY--------------------------------------------") # print(action[0], direction, colour) # print(piece) # print(piece.get_legal_actions()) elim = self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) self.undo_action(action, colour, elim) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score if alpha >= beta: break return best_val, best_action def cutoff_test(self, depth): if depth == 0: return True if self.is_terminal(): return True return False def evaluate_state(self, board, colour, actions): # return len(self.board.white_pieces) - len(self.board.black_pieces) return self.evaluation.evaluate(board, colour, actions) # update the available moves of the search algorithm after it has been instantiated # # def update_available_moves(self, node, available_moves): # node.available_moves = available_moves def update_board(self, board): self.board = deepcopy(board) def is_terminal(self): return self.board.is_terminal() def undo_action(self, action, colour, elim_pieces): self.board.undo_action(action, colour, elim_pieces)