예제 #1
    def minimax(self, board, maximize=True, depth=4):
        if self.terminal(board) or depth == 0:
            return (None, self.score(board))

        # if maximising player
        if maximize:
            best_score = -math.inf
            all_actions = Barasingga.available_actions(board, self.player)
            best_action = random.choice(all_actions)
            for action in all_actions:
                new = Barasingga.result(board, action)
                score = self.minimax(new, maximize=False, depth=depth - 1)[1]
                if score > best_score:
                    best_score = score
                    best_action = action
            return (best_action, best_score)

        # if not maximising player
            best_score = math.inf
            all_actions = Barasingga.available_actions(board, self.other)
            best_action = random.choice(all_actions)
            for action in all_actions:
                new = Barasingga.result(board, action)
                score = self.minimax(new, maximize=False, depth=depth - 1)[1]
                if score < best_score:
                    best_score = score
                    best_action = action
            return (best_action, best_score)
예제 #2
def train(n):
    player = BarasinggaQlearning()

    for i in range(n):
        print(f"training {i+1}")

        game = Barasingga()

        # Keep track of last move made by either player
        last = {
            1: {
                "state": None,
                "action": None
            2: {
                "state": None,
                "action": None

        # Game loop
        while True:

            # Keep track of current state and action
            state = (tuple_board(game.board), game.player)
            action = player.choose_action(state)

            # Keep track of last state and action
            last[game.player]["state"] = state
            last[game.player]["action"] = action

            # Make move
            new_state = (tuple_board(game.board), game.player)

            # When game is over, update Q values with rewards
            if game.winner is not None:
                player.update(state, action, new_state, -1)
                              last[game.player]["action"], new_state, 1)

            elif game.over:
                player.update(state, action, new_state, -1)
                              last[game.player]["action"], new_state, 1)
            # If game is continuing, no rewards yet
            elif last[game.player]["state"] is not None:
                              last[game.player]["action"], new_state, 0)

    print("Done training")

    # Return the trained AI
    return player
예제 #3
    def choose_action(self, state, epsilon=True):
        Given a state `state`, return an action `(i, j)` to take.

        If `epsilon` is `False`, then return the best action
        available in the state (the one with the highest Q-value,
        using 0 for pairs that have no Q-values).

        If `epsilon` is `True`, then with probability
        `self.epsilon` choose a random available action,
        otherwise choose the best action available.

        If multiple actions have the same Q-value, any of those
        options is an acceptable return value.
        q_value = -math.inf
        tuple_board, player = state
        board = self.list_board(tuple_board)
        actions = Barasingga.available_actions(board, player)
        best_action = random.choice(actions)
        for action in actions:
            the_state = (state, action)
            if the_state in self.q and self.q[the_state] >= q_value:
                best_action = action
                q_value = self.q[the_state]

        if epsilon:
            if len(actions) > 1:
                total = [best_action] * 9 * len(actions) + list(actions)
                return random.choice(total)
        return best_action
예제 #4
    def best_future_reward(self, state):
        Given a state `state`, consider all possible `(state, action)`
        pairs available in that state and return the maximum of all
        of their Q-values.

        Use 0 as the Q-value if a `(state, action)` pair has no
        Q-value in `self.q`. If there are no available actions in
        `state`, return 0.
        max_q_value = -math.inf
        tuple_board, player = state
        board = self.list_board(tuple_board)
        actions = Barasingga.available_actions(board, player)
        if len(actions) == 0:
            return 0

        for action in actions:
            the_state = (state, action)

            if the_state in self.q:
                q_value = self.q[the_state]
                q_value = 0

            if q_value > max_q_value:
                max_q_value = q_value

        return max_q_value
예제 #5
 def __init__(self, player=2, depth=4):
     self.player = player
     self.depth = depth
     self.other = Barasingga.other_player(self.player)
예제 #6
 def terminal(self, board):
     result, winner = Barasingga.game_over(board)
     if result is not None:
         return True
     return False
예제 #7
 def score(self, board):
     p1 = Barasingga.count_pieces(board, 1)
     p2 = Barasingga.count_pieces(board, 2)
     return p2 - p1
예제 #8
blue_dot = pygame.image.load('assets/blue50.png')
red_dot = pygame.image.load('assets/red50.png')

padding = 60
board_size = 600
scale = int(board_size / 4)
line_width = 1

#create screen
size = (board_size + 2 * padding, board_size + 2 * padding)
screen = pygame.display.set_mode(size)

game = Barasingga()
bai = BarasinggaAI(depth=3)
# empty list to store clicks
clicks = []

# pygame window
run = True
while run:

    # Check if game quit
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            run = False
        # collect mouse clicks
        if event.type == pygame.MOUSEBUTTONDOWN: