Beispiel #1
0
    def minimax(self, board, maximize=True, depth=4):
        if self.terminal(board) or depth == 0:
            return (None, self.score(board))

        # if maximising player
        if maximize:
            best_score = -math.inf
            all_actions = Barasingga.available_actions(board, self.player)
            best_action = random.choice(all_actions)
            for action in all_actions:
                new = Barasingga.result(board, action)
                score = self.minimax(new, maximize=False, depth=depth - 1)[1]
                if score > best_score:
                    best_score = score
                    best_action = action
            return (best_action, best_score)

        # if not maximising player
        else:
            best_score = math.inf
            all_actions = Barasingga.available_actions(board, self.other)
            best_action = random.choice(all_actions)
            for action in all_actions:
                new = Barasingga.result(board, action)
                score = self.minimax(new, maximize=False, depth=depth - 1)[1]
                if score < best_score:
                    best_score = score
                    best_action = action
            return (best_action, best_score)
Beispiel #2
0
    def best_future_reward(self, state):
        """
        Given a state `state`, consider all possible `(state, action)`
        pairs available in that state and return the maximum of all
        of their Q-values.

        Use 0 as the Q-value if a `(state, action)` pair has no
        Q-value in `self.q`. If there are no available actions in
        `state`, return 0.
        """
        max_q_value = -math.inf
        tuple_board, player = state
        board = self.list_board(tuple_board)
        actions = Barasingga.available_actions(board, player)
        if len(actions) == 0:
            return 0

        for action in actions:
            the_state = (state, action)

            if the_state in self.q:
                q_value = self.q[the_state]
            else:
                q_value = 0

            if q_value > max_q_value:
                max_q_value = q_value

        return max_q_value
Beispiel #3
0
    def choose_action(self, state, epsilon=True):
        """
        Given a state `state`, return an action `(i, j)` to take.

        If `epsilon` is `False`, then return the best action
        available in the state (the one with the highest Q-value,
        using 0 for pairs that have no Q-values).

        If `epsilon` is `True`, then with probability
        `self.epsilon` choose a random available action,
        otherwise choose the best action available.

        If multiple actions have the same Q-value, any of those
        options is an acceptable return value.
        """
        q_value = -math.inf
        tuple_board, player = state
        board = self.list_board(tuple_board)
        actions = Barasingga.available_actions(board, player)
        best_action = random.choice(actions)
        for action in actions:
            the_state = (state, action)
            if the_state in self.q and self.q[the_state] >= q_value:
                best_action = action
                q_value = self.q[the_state]

        if epsilon:
            if len(actions) > 1:
                actions.remove(best_action)
                total = [best_action] * 9 * len(actions) + list(actions)
                return random.choice(total)
        return best_action
Beispiel #4
0
        time.sleep(0.1)
        game.move(m)

    if len(clicks) == 2:
        initial_mouse = clicks[0]
        final_mouse = clicks[1]
        initial = None
        final = None
        for i in range(5):
            for j in range(5):
                if pieces[j][i].collidepoint(initial_mouse):
                    initial = (i, j)
                if pieces[j][i].collidepoint(final_mouse):
                    final = (i, j)
        action = (initial, final)
        if action in game.available_actions(game.board, game.player):
            game.move(action)
        clicks = []

    # drawing the pieces
    pieces = []
    for i in range(5):
        row = []
        for j in range(5):
            center = (padding + i * scale, padding + j * scale)
            rect = pygame.Rect(padding + i * scale - 25,
                               padding + j * scale - 25, 50, 50)
            row.append(rect)
            if game.board[j][i] == 1:
                screen.blit(blue_dot, rect)
            elif game.board[j][i] == 2: