def get_heuristic_value(board: Board): """ Given a board, calculates and returns its rating based on heuristics. """ # Get a list of all squares with white pieces and a list of squares with # black pieces. white_squares: List[Square] = board.get_player_squares( PlayerColor.WHITE) black_squares: List[Square] = board.get_player_squares( PlayerColor.BLACK) # If there are any black pieces, calculate the sum of all white pieces' # manhattan displacement to the first black piece in the list. This # piece will remain consistent until it is dead. This fixes the issue of # white pieces, when separated from the black pieces, not being able to # find their way to the black pieces easily. manhattan_dist_sum: int = 0 if (len(black_squares) > 0): black_square: Square = black_squares[0] for white_square in white_squares: displacement: Pos2D = (black_square.pos - white_square.pos) manhattan_dist_sum += abs(displacement.x) + abs(displacement.y) # Calculate the number of white and black pieces. This is a very # important heuristic that will help prioritize preserving white's own # pieces and killing the enemy's black pieces. num_white_pieces: int = len(white_squares) num_black_pieces: int = len(black_squares) # Return the heuristic rating by using the appropriate weights. return round( IDSAgent._WHITE_WEIGHT * num_white_pieces - IDSAgent._BLACK_WEIGHT * num_black_pieces - IDSAgent._DIST_WEIGHT * manhattan_dist_sum, IDSAgent._RATING_NUM_ROUNDING)
def get_alpha_beta_value(board: Board, depth: int, alpha: float, beta: float, color: PlayerColor, parameters: List[float]) -> float: if (depth == 0 or board.phase == GamePhase.FINISHED): return Player.get_heuristic_value(board, color, parameters) if (color == PlayerColor.WHITE): # Maximizer v: float = -999999 deltas: List[Delta] = board.get_all_possible_deltas(color) for delta in deltas: v = max( v, Player.get_alpha_beta_value(board.get_next_board(delta), depth - 1, alpha, beta, color.opposite(), parameters)) alpha = max(alpha, v) if (beta <= alpha): break return v else: # Minimizer v = 999999 deltas: List[Delta] = board.get_all_possible_deltas(color) for delta in deltas: v = min( v, Player.get_alpha_beta_value(board.get_next_board(delta), depth - 1, alpha, beta, color.opposite(), parameters)) beta = min(beta, v) if (beta <= alpha): break return v
def set_board(self, player_num): if player_num == 1: self.board = Board(Square(20, 100), Square(380, 460)) elif player_num == 2: self.board = Board(Square(460, 100), Square(820, 460)) self.board.squares_not_shot_list() self.board.available_squares_dict(self)
def __init__(self, start_board: Board = None, seed: int = random.randint(0, 999999)): if (start_board == None): self._board = Board(None, 1, GamePhase.PLACEMENT) else: self._board = start_board self._node = self._init_node random.seed(seed)
def __init__(self): # current player step self.curPlayer = 'X' self.isPause = False self.isDraw = False self.winner = None self.msg = '' self.board = Board()
class Game: def __init__(self): # current player step self.curPlayer = 'X' self.isPause = False self.isDraw = False self.winner = None self.msg = '' self.board = Board() def draw(self, sc): sc.fill(BG) self.board.draw(sc) pygame.draw.rect(sc, BORDER_COLOR, (0, 0, W, H), 10) if self.msg: self.showMessage(sc, self.msg, True) elif self.isPause: self.showMessage(sc, 'Paused'.upper(), True) elif self.isDraw: self.showMessage(sc, 'Draw'.upper(), True) elif self.winner: self.showMessage(sc, 'Winner {}'.format(self.winner), True) @staticmethod def showMessage(sc, msg, fill=False): if fill: sc.fill(BG) font = pygame.font.SysFont('Arial', 40) fontSurf = font.render(msg, 1, TEXT_COLOR) fontRect = fontSurf.get_rect(center=(W // 2, H // 2)) sc.blit(fontSurf, fontRect) def pause(self): self.isPause = True def run(self): self.isPause = False def toggleStatus(self): if self.isPause: self.run() else: self.pause() def quit(self): pass def update(self, *args): pass
def get_best_delta(board: Board, player: PlayerColor, depth: int, recent_board_history: List[str]) \ -> Tuple[Delta, List[float]]: """ Returns the highest-rated (or best) move from the current board for the given player, exploring 'depth' number of levels to determine the best move. recent_board_history helps avoid repeating board states. Along with the delta object for the best move, also returns a list of floats, containing the ratings for the series of moves used to rate the returned delta. This list is more thoroughly explained in the docs for 'get_board_ratings'. """ # Evaluate all of the possible moves from this board. deltas: List[Delta] = board.get_all_possible_deltas(player) # Shuffling the calculated deltas can assist in avoiding endless loops, # particularly if board states are being repeated. random.shuffle(deltas) # Iterate through every valid move, rating them and keeping track of the # best move along the way. best_delta: Tuple[Delta, List[float]] = (None, [-999999]) for delta in deltas: delta_ratings: List[float] = \ IDSAgent.get_board_ratings(board.get_next_board(delta), depth - 1, recent_board_history) # This "max" criteria defined by the lambda looks a bit complex, so # let's explain. Keep in mind that floats further to the left in a # given list represents the rating of a board further down the game # three. When finding the best move sequence (from the current # board's perspective), we prioritize moves that result in the best # score at its furthest down board state i.e. [5 1 1] is better than # [1 9 9], because three moves down, it will have a board rated 5 vs # the other's board which is rated 1. In the event of this first # value being equal, we prefer the shortest list i.e. [5 2] is # better than [5 3 2]. This is because the shorter list means that # the game will be over sooner (but still have a board rating as # good as the longer list). This will help the algorithm execute # killing moves in 'Massacre' and not put them off by doing an # inconsequential move first. Finally, if the lengths are the same, # we prioritize the list with the highest rating at any given index # i.e. [5 3 3] is better than [5 3 2] because it means that we're # doing the moves that will keep the board's rating as high as # possible (again, only if the comparison gets to this point). # Sorted example according to this criteria: # [4 3 2] > [3 1] > [3 2 2] > [3 2 1] > [-999]. best_delta = max([best_delta, (delta, delta_ratings)], key=lambda x: (x[1][0], -len(x[1]), x[1])) return best_delta
def parta(): # Notes on style: # This project utilizes type hints, as specified in the PEP484 standard: # https://www.python.org/dev/peps/pep-0484/ # This project utilizes variable annotations, as specified in the PEP526 # standard: https://www.python.org/dev/peps/pep-0526/ # This, in our opinion, makes the code easier to read, debug, and # understand, as types are (almost) always explicitly stated. # Additionally, we utilize the convention of naming attributes of classes # with a leading underscore ('_') if they are not meant as part of its # public interface i.e. not meant to be used outside of the defining class. # These tend to be helper methods for other, larger methods that *are* part # of the public interface for that class. This, in our opinion, makes the # utilization classes easier as it's easier to see which methods to avoid # using outside of their defining classes. board: Board = Board.create_from_string(1, GamePhase.MOVEMENT) mode: str = input() if (mode == MOVES): print(board.get_num_moves(Player.WHITE)) print(board.get_num_moves(Player.BLACK)) elif (mode == MASSACRE): alpha_beta_agent = IDSAgent(board, 1) alpha_beta_agent.massacre()
def setup_all(self): player = Player() self.setup_player(player) map_game = Board() self.map_game = map_game client = Client() self.client = client
class Player: def __init__(self, username): self.username = username self.board = None def check_if_lost(self): count = 0 for ship in self.board.ships: count += ship.is_ship_sunk(self) return count == len(self.board.ships) def set_board(self, player_num): if player_num == 1: self.board = Board(Square(20, 100), Square(380, 460)) elif player_num == 2: self.board = Board(Square(460, 100), Square(820, 460)) self.board.squares_not_shot_list() self.board.available_squares_dict(self)
def __init__(self, color: str, parameters: List[float]): """ TODO This method is called by the referee once at the beginning of the game to initialise your player. You should use this opportunity to set up your own internal representation of the board, and any other state you would like to maintain for the duration of the game. The input parameter colour is a string representing the piece colour your program will control for this game. It can take one of only two values: the string 'white' (if you are the White player for this game) or the string 'black' (if you are the Black player for this game). """ self.parameters = parameters self._board = Board(None, 0, GamePhase.PLACEMENT) if (color.lower() == "white"): self._color = PlayerColor.WHITE else: self._color = PlayerColor.BLACK random.seed(Player._SEED)
def alphabeta(board: Board, node: Node, depth: int, alpha: float, beta: float, is_maximizer: bool) -> float: if (depth == 0 or board.phase == GamePhase.FINISHED): return AlphaBetaAgent.get_heuristic_value(board) if (is_maximizer): v: float = -999999 deltas: List[Delta] = board.get_all_possible_deltas(Utils.get_player(board.round_num)) for delta in deltas: child_node: Node = Node(node, delta) v = max(v, AlphaBetaAgent.alphabeta(board.get_next_board(delta), child_node, depth - 1, alpha, beta, False)) alpha = max(alpha, v) if (beta <= alpha): break return v else: v = 999999 deltas: List[Delta] = board.get_all_possible_deltas(Utils.get_player(board.round_num)) for delta in deltas: child_node: Node = Node(node, delta) v = min(v, AlphaBetaAgent.alphabeta(board.get_next_board(delta), child_node, depth - 1, alpha, beta, True)) beta = min(beta, v) if beta <= alpha: break return v
def __init__(self): # Setup players pl1 = Player(1, "Carl", "Red", self) pl2 = Player(2, "Naish", "Blue", self) pl3 = Player(3, "Mark", "Yellow", self) pl4 = Player(4, "Greeny", "Green", self) SetupGame.players = [pl1, pl2, pl3, pl4] # Setup Game Board SetupGame.mainBoard = Board(12, 10) # Select next player SetupGame.currentPlayer = math.ceil( (random() * len(SetupGame.players))) SetupGame.nextPlayer = (SetupGame.currentPlayer + 1 - 1) % len( SetupGame.players) + 1 # Set starting season season = math.ceil(random() * 4)
from globals import * from Classes.Board import Board # init pygame pygame.init() # create window sc = pygame.display.set_mode(SCREEN) sc.fill(WHITE) clock = pygame.time.Clock() # create board board = Board() # set caption pygame.display.set_caption('Arcanoid') # game loop while 1: board.update() board.draw(sc) # redraw screen pygame.display.update() # handle events events = pygame.event.get()
def get_heuristic_value(board: Board): num_white_pieces: int = len(board._get_player_squares(PlayerColor.WHITE)) num_black_pieces: int = len(board._get_player_squares(PlayerColor.BLACK)) return num_white_pieces - num_black_pieces
from globals import * from Classes.Board import Board # init pygame pygame.init() # create window sc = pygame.display.set_mode(SCREEN) sc.fill(WHITE) clock = pygame.time.Clock() # create board board = Board() # set caption pygame.display.set_caption('Tetris') # game loop while 1: board.update() board.draw(sc) # redraw screen pygame.display.update() # handle events events = pygame.event.get()
class AlphaBetaAgent(): _board: Board _node: Node _init_node: Node = Node(None, None) def __init__(self, start_board: Board = None, seed: int = random.randint(0, 999999)): if (start_board == None): self._board = Board(None, 1, GamePhase.PLACEMENT) else: self._board = start_board self._node = self._init_node random.seed(seed) def run(self): print(self._board) is_maximizer: bool = False while (self._board.phase != GamePhase.FINISHED): deltas: List[Delta] = self._board.get_all_possible_deltas(Utils.get_player(self._board.round_num)) delta_scores: List[Tuple[Delta, float]] = [] for delta in deltas: delta_scores.append((delta, AlphaBetaAgent.alphabeta(self._board.get_next_board(delta), Node(self._node, delta), 2, -9999, 9999, is_maximizer))) if (len(set([delta_score[1] for delta_score in delta_scores])) == 1): best_delta: Tuple[Delta, float] = random.choice(delta_scores) elif not is_maximizer: best_delta: Tuple[Delta, float] = max(delta_scores, key=lambda x:x[1]) elif is_maximizer: best_delta: Tuple[Delta, float] = min(delta_scores, key=lambda x:x[1]) self._board = self._board.get_next_board(best_delta[0]) self._node = Node(self._node, best_delta[0]) is_maximizer = not is_maximizer print("{:3}: {} ({})".format(self._board.round_num - 1, best_delta[0], best_delta[1])) print(self._board) @staticmethod def alphabeta(board: Board, node: Node, depth: int, alpha: float, beta: float, is_maximizer: bool) -> float: if (depth == 0 or board.phase == GamePhase.FINISHED): return AlphaBetaAgent.get_heuristic_value(board) if (is_maximizer): v: float = -999999 deltas: List[Delta] = board.get_all_possible_deltas(Utils.get_player(board.round_num)) for delta in deltas: child_node: Node = Node(node, delta) v = max(v, AlphaBetaAgent.alphabeta(board.get_next_board(delta), child_node, depth - 1, alpha, beta, False)) alpha = max(alpha, v) if (beta <= alpha): break return v else: v = 999999 deltas: List[Delta] = board.get_all_possible_deltas(Utils.get_player(board.round_num)) for delta in deltas: child_node: Node = Node(node, delta) v = min(v, AlphaBetaAgent.alphabeta(board.get_next_board(delta), child_node, depth - 1, alpha, beta, True)) beta = min(beta, v) if beta <= alpha: break return v @staticmethod def get_heuristic_value(board: Board): num_white_pieces: int = len(board._get_player_squares(PlayerColor.WHITE)) num_black_pieces: int = len(board._get_player_squares(PlayerColor.BLACK)) return num_white_pieces - num_black_pieces
class Player(): # --- Heuristic Weights --- # TODO: Consider if we should weigh own player's pieces higher than enemies's. _OWN_PIECE_WEIGHT: float _OPPONENT_PIECE_WEIGHT: float # Don't want to prioritize mobility over pieces, so it's much smaller. _OWN_MOBILITY_WEIGHT: float _OPPONENT_MOBILITY_WEIGHT: float # TODO: How to balance cohesiveness and mobility? They're opposing, in a way. _OWN_DIVIDED_WEIGHT: float # Bad to be divided. Want to be cohesive! _OPPONENT_DIVIDED_WEIGHT: float # Good for opponent to be divided. _OWN_NON_CENTRALITY_WEIGHT: float _OPPONENT_NON_CENTRALITY_WEIGHT: float # Heuristic score decimal place rounding. Used to prevent floating point # imprecision from interfering with move decisions. _RATING_NUM_ROUNDING: int = 10 _ALPHA_START_VALUE: int = -9999 _BETA_START_VALUE: int = 9999 _SEED: int = 13373 # A reference to the current board that the agent is on. _board: Board _color: PlayerColor # The depth to go in each iteration of the iterative-deepening search # algorithm i.e. number of moves to look ahead. _depth: int = 1 def __init__(self, color: str, parameters: List[float]): """ TODO This method is called by the referee once at the beginning of the game to initialise your player. You should use this opportunity to set up your own internal representation of the board, and any other state you would like to maintain for the duration of the game. The input parameter colour is a string representing the piece colour your program will control for this game. It can take one of only two values: the string 'white' (if you are the White player for this game) or the string 'black' (if you are the Black player for this game). """ self.parameters = parameters self._board = Board(None, 0, GamePhase.PLACEMENT) if (color.lower() == "white"): self._color = PlayerColor.WHITE else: self._color = PlayerColor.BLACK random.seed(Player._SEED) def action(self, turns) -> Union[str, None]: """ This method is called by the referee to request an action by your player. The input parameter turns is an integer representing the number of turns that have taken place since the start of the current game phase. For example, if White player has already made 11 moves in the moving phase, and Black player has made 10 moves (and the referee is asking for its 11th move), then the value of turns would be 21. Based on the current state of the board, your player should select its next action and return it. Your player should represent this action based on the instructions below, in the ‘Representing actions’ section. """ deltas: List[Delta] = self._board.get_all_possible_deltas(self._color) if (len(deltas) == 0): return None delta_scores: Dict[Delta, float] = {} for delta in deltas: delta_scores[delta] = \ Player.get_alpha_beta_value( self._board.get_next_board(delta), Player._depth - 1, Player._ALPHA_START_VALUE, Player._BETA_START_VALUE, self._color, self.parameters) if self._board.round_num > 0 and \ self._board.phase == GamePhase.PLACEMENT: test = { k: v for k, v in delta_scores.items() if (not self._board.is_suicide(k)) } delta_scores = test best_deltas: List[Delta] = Utils.get_best_deltas( delta_scores, self._color) best_delta: Tuple[Delta, float] if (len(best_deltas) > 1): # There are more than one "best" deltas. Pick a random one. best_delta = random.choice(list(best_deltas)) else: best_delta = best_deltas[0] self._board = self._board.get_next_board(best_delta[0]) # if self._color == PlayerColor.WHITE and self.parameters == [1, -1, 0.01, -0.01]: # print([(str(delta), score) for delta, score in delta_scores.items()]) # print("{} {} DOES {} [{}]".format(self.parameters, self._color, best_delta[0], best_delta[1])) return best_delta[0].get_referee_form() def update(self, action: Tuple[Union[int, Tuple[int]]]): """ This method is called by the referee to inform your player about the opponent’s most recent move, so that you can maintain your internal board configuration. The input parameter action is a representation of the opponent’s recent action based on the instructions below, in the ‘Representing actions’ section. This method should not return anything. Note: update() is only called to notify your player about the opponent’s actions. Your player will not be notified about its own actions. - To represent the action of placing a piece on square (x,y), use a tuple (x,y). - To represent the action of moving a piece from square (a,b) to square (c,d), use a nested tuple ((a,b),(c,d)). - To represent a forfeited turn, use the value None. """ # TODO # Easiest way to generate a Delta from 'action' seems to be to use # board.get_valid_movements or board.get_valid_placements and then # "getting" the Delta being made by matching the Pos2Ds. if (action is None): # Opponent forfeited turn. self._board.round_num += 1 self._board._update_game_phase() return positions: List[Pos2D] if (type(action[0]) == int): positions = [Pos2D(action[0], action[1])] else: positions = [Pos2D(x, y) for x, y in action] opponent_delta: Delta = None deltas: List[Delta] if (len(positions) == 1): # Placement assert (self._board.phase == GamePhase.PLACEMENT) deltas = self._board.get_possible_placements( self._color.opposite()) for delta in deltas: if delta.move_target.pos == positions[0]: opponent_delta = delta break elif (len(positions) == 2): # Movement. try: assert (self._board.phase == GamePhase.MOVEMENT) except AssertionError: print( "WARNING: 'assert(self._board.phase == GamePhase.MOVEMENT)' FAILED.'" ) print("SETTING PHASE = GAMEPHASE.MOVEMENT.") self._board.phase = GamePhase.MOVEMENT deltas = self._board.get_possible_moves(positions[0]) for delta in deltas: if delta.move_target.pos == positions[1]: opponent_delta = delta break assert (opponent_delta is not None) self._board = self._board.get_next_board(opponent_delta) @staticmethod def get_alpha_beta_value(board: Board, depth: int, alpha: float, beta: float, color: PlayerColor, parameters: List[float]) -> float: if (depth == 0 or board.phase == GamePhase.FINISHED): return Player.get_heuristic_value(board, color, parameters) if (color == PlayerColor.WHITE): # Maximizer v: float = -999999 deltas: List[Delta] = board.get_all_possible_deltas(color) for delta in deltas: v = max( v, Player.get_alpha_beta_value(board.get_next_board(delta), depth - 1, alpha, beta, color.opposite(), parameters)) alpha = max(alpha, v) if (beta <= alpha): break return v else: # Minimizer v = 999999 deltas: List[Delta] = board.get_all_possible_deltas(color) for delta in deltas: v = min( v, Player.get_alpha_beta_value(board.get_next_board(delta), depth - 1, alpha, beta, color.opposite(), parameters)) beta = min(beta, v) if (beta <= alpha): break return v @staticmethod def get_heuristic_value(board: Board, player: PlayerColor, parameters: List[float]): """ Given a board, calculates and returns its rating based on heuristics. """ player_squares: List[Square] = board.get_player_squares(player) opponent_squares: List[Square] = board.get_player_squares( player.opposite()) # -- Num pieces -- # Calculate the number of white and black pieces. This is a very # important heuristic that will help prioritize preserving white's own # pieces and killing the enemy's black pieces. num_own_pieces: int = len(player_squares) num_opponent_pieces: int = len(opponent_squares) # -- Mobility -- # Calculate the mobility for both white and black i.e. the number of # possible moves they can make. own_mobility: int = board.get_num_moves(player) opponent_mobility: int = board.get_num_moves(player.opposite()) # -- Cohesiveness -- own_total_distance: int = 0 opponent_total_distance: int = 0 displacement: Pos2D for idx, square in enumerate(player_squares): for square2 in player_squares[idx + 1:]: displacement = square.pos - square2.pos own_total_distance += abs(displacement.x) + abs(displacement.y) for idx, square in enumerate(opponent_squares): for square2 in opponent_squares[idx + 1:]: displacement = square.pos - square2.pos opponent_total_distance += abs(displacement.x) + abs( displacement.y) own_avg_allied_distance: float = own_total_distance / (num_own_pieces + 1) opponent_avg_allied_distance: float = opponent_total_distance / ( num_opponent_pieces + 1) # -- Centrality -- own_total_distance: int = 0 opponent_total_distance: int = 0 x_displacement: float y_displacement: float for square in player_squares: x_displacement = 3.5 - square.pos.x y_displacement = 3.5 - square.pos.y own_total_distance += abs(x_displacement) + abs(y_displacement) for square in opponent_squares: x_displacement = 3.5 - square.pos.x y_displacement = 3.5 - square.pos.y opponent_total_distance += abs(x_displacement) + abs( y_displacement) own_avg_center_distance: float = own_total_distance / (num_own_pieces + 1) opponent_avg_center_distance: float = opponent_total_distance / ( num_opponent_pieces + 1) # Calculate the heuristic score/rating. rounded_heuristic_score: float = round( parameters[0] * num_own_pieces + parameters[1] * num_opponent_pieces + parameters[2] * own_mobility + parameters[3] * opponent_mobility + parameters[4] * own_avg_allied_distance + parameters[5] * opponent_avg_allied_distance + parameters[6] * own_avg_center_distance + parameters[7] * opponent_avg_center_distance, Player._RATING_NUM_ROUNDING) # Return the score as is or negate, depending on the player. # For white, return as is. For black, negate. return rounded_heuristic_score if player == PlayerColor.WHITE \ else -rounded_heuristic_score
def get_heuristic_value(board: Board, player: PlayerColor, parameters: List[float]): """ Given a board, calculates and returns its rating based on heuristics. """ player_squares: List[Square] = board.get_player_squares(player) opponent_squares: List[Square] = board.get_player_squares( player.opposite()) # -- Num pieces -- # Calculate the number of white and black pieces. This is a very # important heuristic that will help prioritize preserving white's own # pieces and killing the enemy's black pieces. num_own_pieces: int = len(player_squares) num_opponent_pieces: int = len(opponent_squares) # -- Mobility -- # Calculate the mobility for both white and black i.e. the number of # possible moves they can make. own_mobility: int = board.get_num_moves(player) opponent_mobility: int = board.get_num_moves(player.opposite()) # -- Cohesiveness -- own_total_distance: int = 0 opponent_total_distance: int = 0 displacement: Pos2D for idx, square in enumerate(player_squares): for square2 in player_squares[idx + 1:]: displacement = square.pos - square2.pos own_total_distance += abs(displacement.x) + abs(displacement.y) for idx, square in enumerate(opponent_squares): for square2 in opponent_squares[idx + 1:]: displacement = square.pos - square2.pos opponent_total_distance += abs(displacement.x) + abs( displacement.y) own_avg_allied_distance: float = own_total_distance / (num_own_pieces + 1) opponent_avg_allied_distance: float = opponent_total_distance / ( num_opponent_pieces + 1) # -- Centrality -- own_total_distance: int = 0 opponent_total_distance: int = 0 x_displacement: float y_displacement: float for square in player_squares: x_displacement = 3.5 - square.pos.x y_displacement = 3.5 - square.pos.y own_total_distance += abs(x_displacement) + abs(y_displacement) for square in opponent_squares: x_displacement = 3.5 - square.pos.x y_displacement = 3.5 - square.pos.y opponent_total_distance += abs(x_displacement) + abs( y_displacement) own_avg_center_distance: float = own_total_distance / (num_own_pieces + 1) opponent_avg_center_distance: float = opponent_total_distance / ( num_opponent_pieces + 1) # Calculate the heuristic score/rating. rounded_heuristic_score: float = round( parameters[0] * num_own_pieces + parameters[1] * num_opponent_pieces + parameters[2] * own_mobility + parameters[3] * opponent_mobility + parameters[4] * own_avg_allied_distance + parameters[5] * opponent_avg_allied_distance + parameters[6] * own_avg_center_distance + parameters[7] * opponent_avg_center_distance, Player._RATING_NUM_ROUNDING) # Return the score as is or negate, depending on the player. # For white, return as is. For black, negate. return rounded_heuristic_score if player == PlayerColor.WHITE \ else -rounded_heuristic_score
class MCTSAgent(): """ Contains the main driver functions used for this AI. Contains functions that, together, implement the Monte Carlo Tree Search algorithm. """ _EXPLORATION_MULTIPLIER: float = sqrt(2) # A reference to the root node in the tree that's being searched by MCTS. tree_root: Node _board: Board _init_board: Board = Board(None, 1, GamePhase.PLACEMENT) def __init__(self, tree_root: Node, start_board: Board = _init_board, seed: int = None): self.tree_root = tree_root self._board = start_board if (seed is not None): random.seed(seed) def train(self, duration_seconds: int): end_time: float = time.time() + duration_seconds while (time.time() < end_time): print(self.tree_root.wins, self.tree_root.num_simulations) # TODO Remove self._simulate() self._board = self._init_board # break # TODO Remove print(self.tree_root.wins, self.tree_root.num_simulations) # TODO Remove def _select(self, node: Node, total_num_simulations: int) -> Node: scores: List[Tuple[Node, float]] = [] unexplored_nodes_score: float = Utils.UCB1( 1, 2, total_num_simulations, MCTSAgent._EXPLORATION_MULTIPLIER) # A list of all deltas which have already been explored at least once. Therefore, they are nodes. children: List[Node] = node.children # A list of all valid deltas from the given board. deltas: List[Delta] = self._board.get_all_possible_deltas( Utils.get_player(self._board.round_num)) if (len(children) > 0): for child in children: # Since some deltas have already been explored and are therefore included in 'children', remove them # from 'deltas' so that it only contains unexplored moves. deltas.remove(child.delta) scores.append((child, Utils.UCB1(child.wins, child.num_simulations, total_num_simulations, MCTSAgent._EXPLORATION_MULTIPLIER))) # Since there are no unexplored options available, we'll set its score to -1 such that the algorithm won't # attempt to choose an unexplored option (since there are none). if len(deltas) == 0: unexplored_nodes_score = -1 # Order by highest scoring nodes. scores = sorted(scores, key=lambda x: x[1], reverse=True) for child, score in scores: if (score > unexplored_nodes_score): # This is to avoid re-exploring a leaf node that resulted in a win or loss. We want to explore new # options. Otherwise we'd have wasted this simulation or back-propagated the same result twice. if (self._board.get_next_board( child.delta).phase == GamePhase.FINISHED): continue else: return child else: # We've now reached a (node : score) pair that has a lower score than all the unexplored moves. # Therefore, stop iterating through existing nodes so we can instead select an unexplored move. break random_delta: Delta = random.choice(deltas) new_child_node: Node = Node(node, random_delta) node.children.append(new_child_node) return new_child_node def _simulate(self): leaf: Node = self._select(self.tree_root, self.tree_root.num_simulations) self._board = self._board.get_next_board(leaf.delta) while (self._board.phase != GamePhase.FINISHED): leaf = self._select(leaf, self.tree_root.num_simulations) self._board = self._board.get_next_board(leaf.delta) if (self._board != 1): # TODO Remove this selection = "({}, {}) -> NODE" if leaf.num_simulations > 2 else "({}, {}) -> EXPLORE" print("{:3}: {} : {}".format( self._board.round_num - 1, leaf.delta.player, selection.format(leaf.wins, leaf.num_simulations))) if (leaf.delta.move_origin is not None): print("{} -> ".format(leaf.delta.move_origin.pos), end="") print("{}".format(leaf.delta.move_target.pos)) print(self._board) print("") self._back_propagate(leaf, self._board.winner) def _back_propagate(self, node: Node, winner: PlayerColor): """ TODO Can be done recursively, but no point in using a stack. Iteratively is more memory efficient. :param node: :param winner: :return: """ while (node is not None): node.num_simulations += 1 if ((node.parent is None and node.children[0].delta.player == winner) or (node.delta is not None and node.delta.player == winner)): node.wins += 1 elif (winner is None): # Must have been a tie. node.wins += 0.5 # TODO Remove this (temp for printing/debugging) # if (node.board.round_num != 1): # print("{:3}: {}: ".format(node.board.round_num - 1, node.delta.player), end="") # if (node.delta.move_origin is not None): # print("{} -> ".format(node.delta.move_origin.pos), end="") # print("{}".format(node.delta.move_target.pos)) # # print(node.board) # print("") node = node.parent
from Classes.Die import Die from Classes.Board import Board #initialize all dice in game redDie = Die() yellowDie = Die() greenDie = Die() blueDie = Die() whiteDie1 = Die() whiteDie2 = Die() board1 = Board() print(board1.board) board1.checkBox('red', 6) board1.checkBox('red', 7) board1.checkBox('red', 8) board1.checkBox('red', 9) board1.checkBox('red', 12) print(board1.board) print(board1.canLockRow(0)) print(board1.scoreBoard())
import pygame import sys import time from pygame.locals import * from Classes.Board import Board from globals import * pygame.init() #variables pyTime = pygame.time.Clock() initializeTime = time.time() board = Board() sc = pygame.display.set_mode((W, H)) #functions def onKeyDown(event): #player 1 if (event.key == K_DOWN): board.player1.changeDir(False) if (event.key == K_UP): board.player1.changeDir(True) #player 2 if (event.key == K_w): board.player2.changeDir(True)
class Player(): # --- Heuristic Weights --- # TODO: Consider if we should weigh own player's pieces higher than enemies's. _OWN_PIECE_WEIGHT: float = 1 _OPPONENT_PIECE_WEIGHT: float = -1 # Don't want to prioritize mobility over pieces, so it's much smaller. _OWN_MOBILITY_WEIGHT: float = 0.01 _OPPONENT_MOBILITY_WEIGHT: float = -0.01 # TODO: How to balance cohesiveness and mobility? They're opposing, in a way. _OWN_DIVIDED_WEIGHT: float = -0.001 # Bad to be divided. Want to be cohesive! _OPPONENT_DIVIDED_WEIGHT: float = 0.001 # Good for opponent to be divided. _OWN_NON_CENTRALITY_WEIGHT: float = -0.005 _OPPONENT_NON_CENTRALITY_WEIGHT: float = 0.005 # Heuristic score decimal place rounding. Used to prevent floating point # imprecision from interfering with move decisions. _RATING_NUM_ROUNDING: int = 10 # --- Timer parameters --- # The total time for the player in the game. _TIME_LIMIT: float = 120.0 # The remaining amount of time remaining at which point the AI will start # picking moves completely randomly so as to not run out of time. _PANIC_MODE_REMAINING_TIME: float = 2.0 # The amount of rounds expected to be played. Includes placement rounds and # all rounds until around 2nd deathzone. _NUM_EXPECTED_ROUNDS: int = 24 + 194 _DEPTH_TWO_EXPECTED_TURN_TIME: float = 200 # --- Other parameters --- _ALPHA_START_VALUE: int = -9999 _BETA_START_VALUE: int = 9999 _SEED: int = 1337 # --- Instance variables --- # A reference to the current board that the agent is on. _timer: Timer _board: Board _color: PlayerColor def __init__(self, color: str): """ TODO This method is called by the referee once at the beginning of the game to initialise your player. You should use this opportunity to set up your own internal representation of the board, and any other state you would like to maintain for the duration of the game. The input parameter colour is a string representing the piece colour your program will control for this game. It can take one of only two values: the string 'white' (if you are the White player for this game) or the string 'black' (if you are the Black player for this game). """ self._board = Board(None, 0, GamePhase.PLACEMENT) if (color.lower() == "white"): self._color = PlayerColor.WHITE else: self._color = PlayerColor.BLACK random.seed(Player._SEED) self._timer = Timer(Player._TIME_LIMIT) def action(self, turns) -> Union[str, None]: """ This method is called by the referee to request an action by your player. The input parameter turns is an integer representing the number of turns that have taken place since the start of the current game phase. For example, if White player has already made 11 moves in the moving phase, and Black player has made 10 moves (and the referee is asking for its 11th move), then the value of turns would be 21. Based on the current state of the board, your player should select its next action and return it. Your player should represent this action based on the instructions below, in the ‘Representing actions’ section. """ with(self._timer): deltas: List[Delta] = self._board.get_all_possible_deltas(self._color) if (len(deltas) == 0): return None remaining_time: float = self._timer.limit - self._timer.clock if (remaining_time < Player._PANIC_MODE_REMAINING_TIME): # AHH! Not much time remaining - pick a random move. print(self._color, "PANIC") random_delta: Delta = random.choice(deltas) self._board = self._board.get_next_board(random_delta) return random_delta.get_referee_form() # Determine the depth based on the amount of time remaining. depth: int remaining_expected_rounds: int = \ Player._NUM_EXPECTED_ROUNDS - self._board.round_num remaining_expected_time_per_round: float = \ remaining_time / (remaining_expected_rounds + 10) if (remaining_expected_time_per_round > Player._DEPTH_TWO_EXPECTED_TURN_TIME): depth = 2 else: depth = 1 print("Looking {} moves ahead!".format(depth)) delta_scores: Dict[Delta, float] = {} for delta in deltas: delta_scores[delta] = \ Player.get_alpha_beta_value( self._board.get_next_board(delta), depth - 1, Player._ALPHA_START_VALUE, Player._BETA_START_VALUE, self._color) best_deltas: List[Delta] = Utils.get_best_deltas(delta_scores, self._color) best_delta: Tuple[Delta, float] if (len(best_deltas) > 1): # There are more than one "best" deltas. Pick a random one. best_delta = random.choice(list(best_deltas)) else: best_delta = best_deltas[0] self._board = self._board.get_next_board(best_delta[0]) print(self._color, "DOES", best_delta[0], "[{}]".format(best_delta[1])) return best_delta[0].get_referee_form() def update(self, action: Tuple[Union[int, Tuple[int]]]): """ This method is called by the referee to inform your player about the opponent’s most recent move, so that you can maintain your internal board configuration. The input parameter action is a representation of the opponent’s recent action based on the instructions below, in the ‘Representing actions’ section. This method should not return anything. Note: update() is only called to notify your player about the opponent’s actions. Your player will not be notified about its own actions. - To represent the action of placing a piece on square (x,y), use a tuple (x,y). - To represent the action of moving a piece from square (a,b) to square (c,d), use a nested tuple ((a,b),(c,d)). - To represent a forfeited turn, use the value None. """ # TODO # Easiest way to generate a Delta from 'action' seems to be to use # board.get_valid_movements or board.get_valid_placements and then # "getting" the Delta being made by matching the Pos2Ds. with self._timer: print(self._color, "SEES", action) if (action is None): # Opponent forfeited turn. self._board.round_num += 1 self._board._update_game_phase() positions: List[Pos2D] if (type(action[0]) == int): positions = [Pos2D(action[0], action[1])] else: positions = [Pos2D(x, y) for x, y in action] opponent_delta: Delta = None deltas: List[Delta] if (len(positions) == 1): # Placement assert(self._board.phase == GamePhase.PLACEMENT) deltas = self._board.get_possible_placements(self._color.opposite()) for delta in deltas: if delta.move_target.pos == positions[0]: opponent_delta = delta break elif (len(positions) == 2): # Movement. assert(self._board.phase == GamePhase.MOVEMENT) deltas = self._board.get_possible_moves(positions[0]) for delta in deltas: if delta.move_target.pos == positions[1]: opponent_delta = delta break assert(opponent_delta is not None) self._board = self._board.get_next_board(opponent_delta) @staticmethod def get_alpha_beta_value(board: Board, depth: int, alpha: float, beta: float, color: PlayerColor) -> float: if (depth == 0 or board.phase == GamePhase.FINISHED): return Player.get_heuristic_value(board, color) if (color == PlayerColor.WHITE): # Maximizer v: float = Player._ALPHA_START_VALUE deltas: List[Delta] = board.get_all_possible_deltas(color) for delta in deltas: v = max(v, Player.get_alpha_beta_value(board.get_next_board(delta), depth - 1, alpha, beta, color.opposite())) alpha = max(alpha, v) if (beta <= alpha): break return v else: # Minimizer v = Player._BETA_START_VALUE deltas: List[Delta] = board.get_all_possible_deltas(color) for delta in deltas: v = min(v, Player.get_alpha_beta_value(board.get_next_board(delta), depth - 1, alpha, beta, color.opposite())) beta = min(beta, v) if (beta <= alpha): break return v @staticmethod def get_heuristic_value(board: Board, player: PlayerColor): """ Given a board, calculates and returns its rating based on heuristics. """ player_squares: List[Square] = board.get_player_squares(player) opponent_squares: List[Square] = board.get_player_squares(player.opposite()) # -- Num pieces -- # Calculate the number of white and black pieces. This is a very # important heuristic that will help prioritize preserving white's own # pieces and killing the enemy's black pieces. num_own_pieces: int = len(player_squares) num_opponent_pieces: int = len(opponent_squares) # -- Mobility -- # Calculate the mobility for both white and black i.e. the number of # possible moves they can make. own_mobility: int = board.get_num_moves(player) opponent_mobility: int = board.get_num_moves(player.opposite()) # -- Cohesiveness -- own_total_distance: int = 0 opponent_total_distance: int = 0 displacement: Pos2D for idx, square1 in enumerate(player_squares): for square2 in player_squares[idx + 1:]: displacement = square1.pos - square2.pos own_total_distance += abs(displacement.x) + abs(displacement.y) for idx, square1 in enumerate(opponent_squares): for square2 in opponent_squares[idx + 1:]: displacement = square1.pos - square2.pos opponent_total_distance += abs(displacement.x) + abs(displacement.y) own_avg_allied_distance: float = own_total_distance / ( num_own_pieces + 1) opponent_avg_allied_distance: float = opponent_total_distance / ( num_opponent_pieces + 1) # -- Centrality -- own_total_distance: int = 0 opponent_total_distance: int = 0 x_displacement: float y_displacement: float for square in player_squares: x_displacement = 3.5 - square.pos.x y_displacement = 3.5 - square.pos.y own_total_distance += abs(x_displacement) + abs(y_displacement) for square in opponent_squares: x_displacement = 3.5 - square.pos.x y_displacement = 3.5 - square.pos.y opponent_total_distance += abs(x_displacement) + abs(y_displacement) own_avg_center_distance: float = own_total_distance / ( num_own_pieces + 1) opponent_avg_center_distance: float = opponent_total_distance / ( num_opponent_pieces + 1) # Calculate the heuristic score/rating. rounded_heuristic_score: float = round( Player._OWN_PIECE_WEIGHT * num_own_pieces + Player._OPPONENT_PIECE_WEIGHT * num_opponent_pieces + Player._OWN_MOBILITY_WEIGHT * own_mobility + Player._OPPONENT_MOBILITY_WEIGHT * opponent_mobility + Player._OWN_DIVIDED_WEIGHT * own_avg_allied_distance + Player._OPPONENT_DIVIDED_WEIGHT * opponent_avg_allied_distance + Player._OWN_NON_CENTRALITY_WEIGHT * own_avg_center_distance + Player._OPPONENT_NON_CENTRALITY_WEIGHT * opponent_avg_center_distance, Player._RATING_NUM_ROUNDING) # Return the score as is or negate, depending on the player. # For white, return as is. For black, negate. return rounded_heuristic_score if player == PlayerColor.WHITE \ else -rounded_heuristic_score