def minimax(self, board, maximize=True, depth=4): if self.terminal(board) or depth == 0: return (None, self.score(board)) # if maximising player if maximize: best_score = -math.inf all_actions = Barasingga.available_actions(board, self.player) best_action = random.choice(all_actions) for action in all_actions: new = Barasingga.result(board, action) score = self.minimax(new, maximize=False, depth=depth - 1)[1] if score > best_score: best_score = score best_action = action return (best_action, best_score) # if not maximising player else: best_score = math.inf all_actions = Barasingga.available_actions(board, self.other) best_action = random.choice(all_actions) for action in all_actions: new = Barasingga.result(board, action) score = self.minimax(new, maximize=False, depth=depth - 1)[1] if score < best_score: best_score = score best_action = action return (best_action, best_score)
def train(n): player = BarasinggaQlearning() for i in range(n): print(f"training {i+1}") game = Barasingga() # Keep track of last move made by either player last = { 1: { "state": None, "action": None }, 2: { "state": None, "action": None } } # Game loop while True: # Keep track of current state and action state = (tuple_board(game.board), game.player) action = player.choose_action(state) # Keep track of last state and action last[game.player]["state"] = state last[game.player]["action"] = action # Make move game.move(action) new_state = (tuple_board(game.board), game.player) # When game is over, update Q values with rewards if game.winner is not None: player.update(state, action, new_state, -1) player.update(last[game.player]["state"], last[game.player]["action"], new_state, 1) break elif game.over: player.update(state, action, new_state, -1) player.update(last[game.player]["state"], last[game.player]["action"], new_state, 1) break # If game is continuing, no rewards yet elif last[game.player]["state"] is not None: player.update(last[game.player]["state"], last[game.player]["action"], new_state, 0) print("Done training") # Return the trained AI return player
def choose_action(self, state, epsilon=True): """ Given a state `state`, return an action `(i, j)` to take. If `epsilon` is `False`, then return the best action available in the state (the one with the highest Q-value, using 0 for pairs that have no Q-values). If `epsilon` is `True`, then with probability `self.epsilon` choose a random available action, otherwise choose the best action available. If multiple actions have the same Q-value, any of those options is an acceptable return value. """ q_value = -math.inf tuple_board, player = state board = self.list_board(tuple_board) actions = Barasingga.available_actions(board, player) best_action = random.choice(actions) for action in actions: the_state = (state, action) if the_state in self.q and self.q[the_state] >= q_value: best_action = action q_value = self.q[the_state] if epsilon: if len(actions) > 1: actions.remove(best_action) total = [best_action] * 9 * len(actions) + list(actions) return random.choice(total) return best_action
def best_future_reward(self, state): """ Given a state `state`, consider all possible `(state, action)` pairs available in that state and return the maximum of all of their Q-values. Use 0 as the Q-value if a `(state, action)` pair has no Q-value in `self.q`. If there are no available actions in `state`, return 0. """ max_q_value = -math.inf tuple_board, player = state board = self.list_board(tuple_board) actions = Barasingga.available_actions(board, player) if len(actions) == 0: return 0 for action in actions: the_state = (state, action) if the_state in self.q: q_value = self.q[the_state] else: q_value = 0 if q_value > max_q_value: max_q_value = q_value return max_q_value
def __init__(self, player=2, depth=4): self.player = player self.depth = depth self.other = Barasingga.other_player(self.player)
def terminal(self, board): result, winner = Barasingga.game_over(board) if result is not None: return True return False
def score(self, board): p1 = Barasingga.count_pieces(board, 1) p2 = Barasingga.count_pieces(board, 2) return p2 - p1
blue_dot = pygame.image.load('assets/blue50.png') red_dot = pygame.image.load('assets/red50.png') padding = 60 board_size = 600 scale = int(board_size / 4) line_width = 1 pygame.init() #create screen size = (board_size + 2 * padding, board_size + 2 * padding) screen = pygame.display.set_mode(size) pygame.display.set_caption('Barasingga') game = Barasingga() bai = BarasinggaAI(depth=3) # empty list to store clicks clicks = [] # pygame window run = True while run: # Check if game quit for event in pygame.event.get(): if event.type == pygame.QUIT: run = False sys.exit() # collect mouse clicks if event.type == pygame.MOUSEBUTTONDOWN: