def undo_available_placement(self): # we just need to pop each piece from the undo_moves effected pieces while len(self.undo_effected) > 0: action = self.undo_effected.pop() #print("POP") #print(action) loc = action[0] #print(loc) colour = action[1] undo_type = action[2] opponent = Board.get_opp_piece_type(colour) if undo_type == constant.ELIMINATED_PIECE: # this piece was eliminated before the undo move, now we have placed it back on the board with undo if loc in self.available_actions[colour]: # remove the action from the dictionary of the corresponding colour self.available_actions[colour].pop(loc) if loc in self.available_actions[opponent]: self.available_actions[opponent].pop(loc) elif undo_type == constant.PLACE_LOC: # a piece was was placed at this location at prior to calling undo move # therefore to reestablish the original available moves list, then we need to add # this piece to the corresponding dict if loc not in self.available_actions[colour] and loc not in\ self.available_actions[opponent]: # if we can place a piece at this location again -- then this piece corresponds to a free space if self.board.within_starting_area(loc, colour): temp = {loc: constant.PLACEMENT_PHASE} self.available_actions[colour].update(temp) if self.board.within_starting_area(loc, opponent): temp = {loc: constant.PLACEMENT_PHASE} self.available_actions[opponent].update(temp)
class Player: def __init__(self, colour): if colour == 'white': self.colour = constant.WHITE_PIECE elif colour == 'black': self.colour = constant.BLACK_PIECE self.available_moves = [] # each players internal board representation self.board = Board() # TODO -- need to see if this works correctly self.strategy = MonteCarloTreeSearch(self.board, self.colour) self.opponent = self.board.get_opp_piece_type(self.colour) def update(self, action): # update the board based on the action of the opponent if self.board.phase == constant.PLACEMENT_PHASE: # update board also returns the pieces of the board that will be eliminated self.board.update_board(action, self.opponent) # self.board.eliminated_pieces[self.opponent] elif self.board.phase == constant.MOVING_PHASE: if isinstance(action[0], tuple) is False: print("ERROR: action is not a tuple") return move_type = self.board.convert_coord_to_move_type( action[0], action[1]) # update the player board representation with the action self.board.update_board((action[0], move_type), self.opponent) def action(self, turns): self.strategy.num_nodes = 0 self.strategy.update_board(self.board) if turns == 0 and self.board.phase == constant.MOVING_PHASE: self.board.move_counter = 0 self.board.phase = constant.MOVING_PHASE best_move = self.strategy.MCTS() # print("NUM NODE IN THIS TREE: " + str(self.strategy.num_nodes)) # once we have found the best move we must apply it to the board representation if self.board.phase == constant.PLACEMENT_PHASE: self.board.update_board(best_move, self.colour) return best_move else: new_pos = Board.convert_move_type_to_coord(best_move[0], best_move[1]) self.board.update_board(best_move, self.colour) return best_move[0], new_pos
def __init__(self, board, colour): # we want to create a node self.tt = TranspositionTable() # only use this board to complete the search # save memory self.board = deepcopy(board) # for alpha beta search -- instead of passing it into the function calls we can use this self.alpha = -inf self.beta = inf # defines the colours of min and max self.player = colour self.opponent = Board.get_opp_piece_type(self.player) # default depth self.depth = inf # default move ordering with iterative deepening self.actions_evaluated = [] self.actions_leftover = [] # data structures for machine learning self.eval_depth = 0 self.minimax_val = 0 self.policy_vector = [] # dictionary storing the available moves of the board self.available_actions = { constant.WHITE_PIECE: {}, constant.BLACK_PIECE: {} } # generate the actions for the start of the game # self.generate_actions() self.undo_effected = [] self.time_alloc = 0 self.time_rem = 0 self.time_start = 0 self.time_end = 0 self.evaluation = Evaluation("./XML", "/eval_weights")
from Agents.Minimax import Minimax # create a new board game board_game = Board() board_game.print_board() # create the starting node -- this node is black node = Minimax.create_node(board_game, constant.BLACK_PIECE, None) node.board.print_board() print(node.available_moves) print(node.colour) print() # to create a child node we apply one of the move from black to the next node child = Minimax.create_node(node.board,Board.get_opp_piece_type(node.colour),node.available_moves[2]) child.board.print_board() print(len(child.available_moves)) print(child.colour) print() print("UNDO MOVE") child.board.undo_move() child.board.print_board() print(child.board.eliminated_pieces) print(child.board.piece_pos) child = Minimax.create_node(child.board,Board.get_opp_piece_type(child.colour),child.available_moves[3]) child.board.print_board() print(len(child.available_moves)) print(child.colour)
def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) original_alpha = alpha dic = {self.player: 1, self.opponent: -1} ''' move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str,colour,phase=self.board.phase) if key is not None: board_str = key[0] entry = self.tt.get_entry(board_str,colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move #print("FOUND ENTRY IN TT") if tt_depth >= depth: if tt_type == constant.TT_EXACT: #print("FOUND PV") return tt_value, tt_best_move elif tt_type == constant.TT_LOWER: if tt_value > alpha: #print("FOUND FAIL SOFT") alpha = tt_value elif tt_type == constant.TT_UPPER: if tt_value < beta: #print("FOUND FAIL HARD") beta = tt_value if alpha >= beta: return tt_value, None ''' # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player) #*dic[colour] return val, None # do the minimax search best_val = -inf best_action = None actions = self.board.update_actions(self.board, colour) ''' if move_to_try is not None and move_to_try in actions: #print("MOVE ORDERING") # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 ''' # get the favourable moves of the board actions = self.get_favourable_actions(self.available_actions) # if there are no favourable actions to iterate on - raise if len(actions) < 0: raise ReturnUnfavourableMove for action in actions: # skip over the best action in the tt table ''' if action == move_to_try and i!= 0: continue i+=1 ''' self.board.update_board(action, colour) score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score self.undo_move() if alpha >= beta: break ''' # store the values in the transposition table if best_val <= original_alpha: # then this is an upperbound -FAILHARD tt_type = constant.TT_UPPER elif best_val >= beta: tt_type = constant.TT_LOWER # print("LOWER") else: tt_type = constant.TT_EXACT # print("EXACT") ''' # add the entry to the transposition table # self.tt.add_entry(self.board.board_state,colour,best_val,tt_type,best_action, depth) return best_val, best_action
def set_player_colour(self, colour): self.player = colour self.opponent = Board.get_opp_piece_type(colour)
class Player: def __init__(self, colour): if colour == 'white': self.colour = constant.WHITE_PIECE elif colour == 'black': self.colour = constant.BLACK_PIECE self.available_moves = [] # each players internal board representation self.board = Board() # initialise the available moves self.init_start_moves() # TODO -- need to see if this works correctly self.minimax = MinimaxABUndo(self.board) self.opponent = self.board.get_opp_piece_type(self.colour) # self.search_algorithm = Minimax(self.board,self.available_moves,self.colour) # print(self.opponent) # set up the board for the first time def init_start_moves(self): # set the initial board parameters # no pieces on the board # available moves is the entire starting zone for each player if self.colour == constant.WHITE_PIECE: # set the white pieces available moves for row in range(0, constant.BOARD_SIZE - 2): for col in range(constant.BOARD_SIZE): if (row, col) not in self.board.corner_pos: self.available_moves.append((col, row)) else: # set the black piece available moves for row in range(2, constant.BOARD_SIZE): for col in range(constant.BOARD_SIZE): if (row, col) not in self.board.corner_pos: # append the available move in the list in the form col, row self.available_moves.append((col, row)) def update(self, action): # print("UPDATING THIS ACTION : " + str(action)) if self.board.move_counter == 0: # then the opponent is the first person to move self.board.set_player_to_move(self.opponent) # update the board based on the action of the opponent # get move type if self.board.phase == constant.PLACEMENT_PHASE: # update board also returns the pieces of the board that will be eliminated self.board.update_board(action, self.opponent) # self.board.eliminated_pieces[self.opponent] self.minimax.update_board(self.board) # remove the opponent piece from the available moves list elif self.board.phase == constant.MOVING_PHASE: if isinstance(action[0], tuple) is False: print("asdfasf") return move_type = self.board.convert_coord_to_move_type( action[0], action[1]) # print("MOVETYPE: " + str(move_type)) # print(action[0]) self.board.update_board((action[0], move_type), self.opponent) #self.minimax.update_available_actions(action,self.opponent) def action(self, turns): if turns == 0 and self.board.phase == constant.PLACEMENT_PHASE: self.board.set_player_to_move(self.colour) if turns == 24 and self.board.phase == constant.PLACEMENT_PHASE: self.board.move_counter = 0 self.board.phase = constant.MOVING_PHASE root = self.minimax.create_node(self.colour, None) self.minimax.update_minimax_board(None, root) # self.minimax.update_available_actions(None) # best_move = self.minimax.alpha_beta_minimax(2,root) # best_move = self.minimax.iterative_deepening_alpha_beta(root) best_move = self.minimax.alpha_beta_minimax(3, root) # do an alpha beta search on this node if self.board.phase == constant.PLACEMENT_PHASE: # print(best_move) self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return best_move else: # (best_move is None) # print(best_move[0],best_move[1]) new_pos = Board.convert_move_type_to_coord(best_move[0], best_move[1]) self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return best_move[0], new_pos
def negascout(self, depth, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) alpha = -inf original_alpha = alpha beta = inf dic = {self.player: 1, self.opponent: -1} move_to_try = None # check if the current board state is in the transposition table board_str = self.board.board_state.decode("utf-8") key = self.tt.contains(board_str, colour, phase=self.board.phase) if key is not None: board_str = key[0] entry = self.tt.get_entry(board_str, colour) tt_value = entry[0] tt_type = entry[1] tt_best_move = entry[2] tt_depth = entry[3] # if we have found an entry in the transposition table, then the move # we should try first is this best move move_to_try = tt_best_move # print(move_to_try) # print("FOUND ENTRY IN TT") if tt_depth >= depth: if tt_type == constant.TT_EXACT: # print("FOUND PV") return tt_best_move elif tt_type == constant.TT_LOWER: if tt_value > alpha: # print("FOUND FAIL SOFT") alpha = tt_value elif tt_type == constant.TT_UPPER: if tt_value < beta: # print("FOUND FAIL HARD") beta = tt_value if alpha >= beta: return tt_best_move actions_1 = self.board.update_actions(self.board, colour) # actions = actions_1 actions = self.board.sort_actions(actions_1, colour) # actions = actions_1 # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions_1) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None if move_to_try is not None and move_to_try in actions: # print("MOVE ORDERING") # put the move to try at the first position -- therefore it will be searched first actions = [move_to_try] + actions i = 0 if len(actions) <= 20: favourable = actions else: favourable = actions[:20] # print(len(actions)) # start negascout here for i, action in enumerate(favourable): # skip over the best action in the tt table if action == move_to_try and i > 0: continue self.board.update_board(action, colour) ''' if i == 0: # do a full search on the best move found so far score, _ = self.negascout(depth-1,-beta,-alpha, opponent) score = -score else: # assume that score, _ = self.negascout(depth-1,-alpha-1,-alpha,opponent) score = -score if alpha < score < beta: score, _ = self.negascout(depth-1,-beta,-score,opponent) score = -score ''' score = -self.negascout_value(depth - 1, -beta, -alpha, opponent) if alpha < score < beta and i > 0: score = -self.negascout_value(depth - 1, -beta, -alpha, opponent) if best_val < score: best_val = score best_action = action if alpha < score: alpha = score self.undo_move() if alpha >= beta: break beta = alpha + 1 # store the values in the transposition table if best_val <= original_alpha: # then this is an upperbound -FAILHARD tt_type = constant.TT_UPPER elif best_val >= beta: tt_type = constant.TT_LOWER # print("LOWER") else: tt_type = constant.TT_EXACT # print("EXACT") # add the entry to the transposition table self.tt.add_entry(self.board.board_state, colour, best_val, tt_type, best_action, depth) return best_action
class Player: def __init__(self, colour): if colour == 'white': self.colour = constant.WHITE_PIECE elif colour == 'black': self.colour = constant.BLACK_PIECE self.available_moves = [] # each players internal board representation self.board = Board() # TODO -- need to see if this works correctly self.minimax = Negamax(self.board, self.colour) self.opponent = self.board.get_opp_piece_type(self.colour) # self.search_algorithm = Minimax(self.board,self.available_moves,self.colour) # print(self.opponent) self.depth_eval = 0 self.minimax_val = 0 self.policy_vector = 0 def update(self, action): # update the board based on the action of the opponent if self.board.phase == constant.PLACEMENT_PHASE: # update board also returns the pieces of the board that will be eliminated self.board.update_board(action, self.opponent) # self.board.eliminated_pieces[self.opponent] self.minimax.update_board(self.board) elif self.board.phase == constant.MOVING_PHASE: if isinstance(action[0], tuple) is False: print("ERROR: action is not a tuple") return move_type = self.board.convert_coord_to_move_type(action[0], action[1]) # update the player board representation with the action self.board.update_board((action[0], move_type), self.opponent) self.minimax.update_board(self.board) def action(self, turns): self.minimax.update_board(self.board) # print(self.board.piece_pos) # if action is called first the board representation move counter will be zero # this indicates that this player is the first one to move # if update is called before action the board representation counter will be 1, # this indicates that the player is the second to move if turns == 0 and self.board.phase == constant.MOVING_PHASE: self.board.move_counter = 0 self.board.phase = constant.MOVING_PHASE # create the node to search on # update the board representation and the available moves # print(self.minimax.available_actions) # best_move = self.minimax.alpha_beta_minimax(3) best_move = self.minimax.itr_negamax() # best_move = self.minimax.alpha_beta(3) self.depth_eval = self.minimax.eval_depth self.minimax_val = self.minimax.minimax_val # do an alpha beta search on this node # once we have found the best move we must apply it to the board representation if self.board.phase == constant.PLACEMENT_PHASE: # print(best_move) self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return best_move else: if best_move is None: return None # (best_move is None) # print(best_move[0],best_move[1]) new_pos = Board.convert_move_type_to_coord(best_move[0], best_move[1]) self.board.update_board(best_move, self.colour) self.minimax.update_board(self.board) return best_move[0], new_pos
node.board.print_board() print(node.available_moves) print(node.colour) print() # to create a child node we apply one of the move from black to the next node child = Minimax.create_node(node.board, node.colour, (3, 2)) child.board.print_board() print(child.available_moves) print(child.board.move_counter) print(child.board.piece_pos) print(child.board.eliminated_pieces) child = Minimax.create_node(child.board, Board.get_opp_piece_type(child.colour), (3, 3)) child.board.print_board() print(child.available_moves) print(child.board.move_counter) print(child.board.piece_pos) print(child.board.eliminated_pieces) child = Minimax.create_node(child.board, Board.get_opp_piece_type(child.colour), (6, 2)) child.board.print_board() print(child.available_moves) print(child.board.move_counter) print(child.board.piece_pos) print(child.board.eliminated_pieces) child = Minimax.create_node(child.board,
class Player: def __init__(self, colour): if colour == 'white': self.colour = constant.WHITE_PIECE elif colour == 'black': self.colour = constant.BLACK_PIECE self.available_moves = [] # each players internal board representation self.board = Board() # initialise the available moves self.init_start_moves() self.opponent = self.board.get_opp_piece_type(self.colour) # print(self.opponent) # set up the board for the first time def init_start_moves(self): # set the initial board parameters # no pieces on the board # available moves is the entire starting zone for each player if self.colour == constant.WHITE_PIECE: # set the white pieces available moves for row in range(0, constant.BOARD_SIZE - 2): for col in range(constant.BOARD_SIZE): if (row, col) not in self.board.corner_pos: self.available_moves.append((col, row)) else: # set the black piece available moves for row in range(2, constant.BOARD_SIZE): for col in range(constant.BOARD_SIZE): if (row, col) not in self.board.corner_pos: # append the available move in the list in the form col, row self.available_moves.append((col, row)) def update(self, action): # print("UPDATING THIS ACTION : " + str(action)) if self.board.move_counter == 0: # then the opponent is the first person to move self.board.set_player_to_move(self.opponent) # update the board based on the action of the opponent # get move type if self.board.phase == constant.PLACEMENT_PHASE: # update board also returns the pieces of the board that will be eliminated self.board.update_board(action, self.opponent) eliminated_pieces = self.board.eliminated_pieces[self.opponent] # remove the eliminated pieces from the available moves of this player for piece in eliminated_pieces: if piece in self.available_moves and Player.within_starting_area( piece, self.colour): # self.available_moves.remove(piece) self.available_moves.append(piece) # remove the opponent piece from the available moves list if action in self.available_moves: self.available_moves.remove(action) # print(self.available_moves) elif self.board.phase == constant.MOVING_PHASE: if isinstance(action[0], tuple) is False: # print("WHYYYYYYYY") return move_type = self.board.convert_coord_to_move_type( action[0], action[1]) # print("MOVETYPE: " + str(move_type)) # print(action[0]) self.board.update_board((action[0], move_type), self.opponent) def action(self, turns): # print("TURNS SO FAR ---------- " + str(turns)) # print("ACTION CALLED: BOARD REPRESENTATION COUNTER: " + str(self.board.move_counter)) if turns == 0 and self.board.phase == constant.PLACEMENT_PHASE: # then we are first person to move self.board.set_player_to_move(self.colour) if turns < 24 and self.board.phase == constant.PLACEMENT_PHASE: # then we pick the best move to make based on a search algorithm search_algorithm = Random(len(self.available_moves)) next_move = self.available_moves[search_algorithm.choose_move()] # making moves during the placement phase self.board.update_board(next_move, self.colour) eliminated_pieces = self.board.eliminated_pieces[self.colour] # remove the move made from the available moves self.available_moves.remove(next_move) if len(eliminated_pieces) != 0: for piece in eliminated_pieces: if piece in self.available_moves: self.available_moves.remove(piece) return next_move elif self.board.phase == constant.MOVING_PHASE: if turns == 0 or turns == 1: # if the turns is 0 or 1 and the board is in moving phase then the # all players have placed their pieces on the board, we can call update_available_moves to update the # available moves available to this player # clear the list self.available_moves = [] # update the lists available moves -- now in the form ((col,row),move_type) # self.update_available_moves() # print(self.available_moves) # we are making a move in the moving phase #print(self.available_moves) self.update_available_moves() # if there are no available moves to be made we can return None: if len(self.available_moves) == 0: return None # print("AVAILABLE MOVES: " + str(self.colour) + " " + str(self.available_moves)) # if there is a move to be made we can return the best move # TODO : THIS IS WHERE WE CARRY OUT OUR SEARCH ALGORITHM # then we pick the best move to make based on a search algorithm search_algorithm = Random(len(self.available_moves)) next_move = self.available_moves[search_algorithm.choose_move()] self.board.update_board(next_move, self.colour) new_pos = self.board.convert_move_type_to_coord( next_move[0], next_move[1]) # print(self.colour + " " + str(self.board.piece_pos)) # TODO - need to double check if this update_available_moves is necessary self.update_available_moves() #print(getsizeof(self.board.piece_pos)) #print(getsizeof(self.board.board_state)) #print(getsizeof(self.available_moves)) return next_move[0], new_pos # updates the available moves a piece can make after it has been moved # this way we don;t need to calculate all the available moves on the board # as pieces that have been eliminated also get rid of those associated available moves def update_available_moves(self): # clear the available moves available_moves = [] self.available_moves = [] # recalculate the moves a piece can make based on the available pieces on the board # print(self.colour) # print("-"*20) # self.board.print_board() # print("-"*20) # print("THIS PLAYERS CURRENT PIECES: " + str(self.colour) + str(self.board.piece_pos[self.colour])) for piece in self.board.piece_pos[self.colour]: for move_type in range(constant.MAX_MOVETYPE): if self.board.is_legal_move(piece, move_type): available_moves.append((piece, move_type)) self.available_moves = available_moves @staticmethod def within_starting_area(move, colour): if colour == constant.WHITE_PIECE: # update the starting rows based off the player colour if colour == constant.WHITE_PIECE: min_row = 0 max_row = 6 elif colour == constant.BLACK_PIECE: min_row = 2 max_row = 8 col, row = move if min_row <= row <= max_row: return True else: return False
def negamax(self, depth, alpha, beta, colour): # Timeout handling self.time_end = self.curr_millisecond_time() if self.time_end - self.time_start > self.time_rem: raise TimeOut opponent = Board.get_opp_piece_type(colour) dic = {self.player: 1, self.opponent: -1} # generate legal actions actions_1 = self.board.update_actions(self.board, colour) # print(len(actions)) actions = self.board.sort_actions(actions_1, colour) # terminal test -- default case if self.cutoff_test(depth): val = self.evaluate_state(self.board, self.player, actions) * dic[colour] return val, None # do the minimax search best_val = -inf best_action = None # generate legal actions #actions = self.board.update_actions(self.board, colour) # split the actions into favourable an unfavourable # if the length of actions is greater than X, then we can just choose to look through the first # 5 'favourable' actions that we see right now # if the length of actions is less than X, then we can just evaluate all possible actions we have # THIS IS A GREEDY APPROACH TO MINIMAX THAT LIMITS OUR BRANCHING FACTOR OF THE GAME if len(actions) > 8: favourable = actions[:8] else: favourable = actions # got here #print("got here") # depth reduction R = 2 #print(favourable) #self.board.print_board() for action in favourable: self.board.update_board(action, colour) if action in favourable: score, temp = self.negamax(depth - 1, -beta, -alpha, opponent) else: score, temp = self.negamax(depth - 1 - R, -beta, -alpha, opponent) score = -score if score > best_val: best_val = score best_action = action if score > alpha: alpha = score self.undo_move() if alpha >= beta: break return best_val, best_action