Ejemplo n.º 1
0
    def get_input(self, board: Game, piece_type):
        self.load_dict()
        print(board.n_move)
        if board.count_player_stones(piece_type) <= 0:
            self.side = piece_type
            self.opponent = 1 if self.side == 2 else 2
            if board.is_position_valid(2, 2, self.side, True):
                copy_board = copy.deepcopy(board)
                copy_board.next_board(2, 2, self.side, True)
                print("Minimax: piece_type = {}".format(self.side), \
                      "current board value = {}".format(self.total_score(copy_board, self.side)))
                return 2, 2
        if board.is_game_finished():
            return
        else:
            # score, action = self._max(board)
            DEPTH = 3
            if board.n_move > 16:
                DEPTH = 24 - board.n_move
            action = self.alpha_beta_cutoff_search(board, DEPTH)
            copy_board = copy.deepcopy(board)
            if action != "PASS":
                print(action)
                copy_board.next_board(action[0], action[1], self.side, True)
            print("Minimax: piece_type = {}".format(self.side), \
                  "current board value = {}".format(self.total_score(copy_board, self.side)))

            self.save_dict()
            return action  # board.move(action[0], action[1], self.side)
Ejemplo n.º 2
0
def battle(player1, player2, total_games, show_result=False):
    p1_stats = [0, 0, 0]  # draw, win, lose
    p2_stats = [0, 0, 0]
    timer = time.time()
    game_number = 0
    for i in range(total_games + 1):
        go = Game(GAME_SIZE)
        go.verbose = show_result
        go.new_board()
        batch = 100
        if game_number % int(total_games / 100) == 0:
            print('number of iterations = {}'.format(i))
            print('time = {}'.format(time.time() - timer))
            timer = time.time()
        p1_stats, p2_stats = play_learn_track(go, game_number, player1,
                                              player2, p1_stats, p2_stats,
                                              batch)
        game_number += 1
        go = Game(GAME_SIZE)
        go.verbose = show_result
        go.new_board()
        p1_stats, p2_stats = play_learn_track(go, game_number, player2,
                                              player1, p1_stats, p2_stats,
                                              batch)
        game_number += 1
    return
Ejemplo n.º 3
0
 def get_input(self, board: Game, piece_type):
     if board.count_player_stones(piece_type) <= 0:
         self.side = piece_type
         self.opponent = 1 if self.side == 2 else 2
         if board.is_position_valid(2, 2, self.side, True):
             return 2, 2
     if board.is_game_finished():
         return
     else:
         # score, action = self._max(board)
         action = self.alpha_beta_cutoff_search(board, 3)
         return action  # board.move(action[0], action[1], self.side)
Ejemplo n.º 4
0
 def get_input(self, board: Game, piece_type):
     if board.count_player_stones(piece_type) <= 0:
         self.side = piece_type
         self.opponent = 1 if self.side == 2 else 2
         if board.is_position_valid(2, 2, self.side, True):
             copy_board = copy.deepcopy(board)
             copy_board.place_chess(2, 2, self.side, True)
             # print("Minimax_old: piece_type = {}".format(self.side), \
             #       "current board value = {}".format(self.total_score(copy_board, self.side)))
             return 2, 2
     if board.is_game_finished():
         return
     else:
         # score, action = self._max(board)
         action = self.alpha_beta_cutoff_search(board, DEPTH)
         if action != "PASS":
             copy_board = copy.deepcopy(board)
             copy_board.place_chess(action[0], action[1], self.side, True)
         # print("Minimax_old: piece_type = {}".format(self.side), \
         # "current board value = {}".format(self.total_score(copy_board, self.side)))
         return action  # board.move(action[0], action[1], self.side)
Ejemplo n.º 5
0
    def get_input(self, go: Game, piece_type):
        if self.identity is None:
            self.identity = piece_type
        elif self.identity != piece_type:
            self.identity = piece_type
        else:
            self.__init__(piece_type)
        self.load_dict()
        # print(board.n_move)
        go.visualize_board()
        if go.count_player_stones(piece_type) <= 0:
            self.identity = piece_type
            self.opponent = 1 if self.identity == 2 else 2
            self.cache = {}
            open("cache.txt", "w").close()
            if go.is_position_valid(2, 2, self.identity, True):
                copy_board = go.make_copy()
                copy_board.next_board(2, 2, self.identity, True)
                # print("Minimax: piece_type = {}".format(self.side), \
                #       "current board value = {}".format(self.total_score(copy_board, self.side)))
                return 2, 2
        if go.is_game_finished():
            return
        else:
            # score, action = self._max(board)
            depth = DEPTH
            action = self.alpha_beta_adaptive_agent(go, depth)
            copy_board = go.make_copy()
            if action != "PASS":
                # print(action)
                copy_board.next_board(action[0], action[1], self.identity,
                                      True)
            # print("Minimax: piece_type = {}".format(self.side), \
            #       "current board value = {}".format(self.total_score(copy_board, self.side)))

            self.save_dict()
            return action  # board.move(action[0], action[1], self.side)
Ejemplo n.º 6
0
from mygame import Game

game = Game()
game.start()
Ejemplo n.º 7
0
        count_white = 2.5
        for i in range(self.size):
            for j in range(self.size):
                if board[i][j] == 1:
                    count_black += 1
                elif board[i][j] == 2:
                    count_white += 1

        if piece_type == 1:
            diff = count_black - count_white
        else:
            diff = count_white - count_black
        return diff


if __name__ == "__main__":
    N = 5
    go_game = Game(N)
    game_piece_type, previous_board, current_board, go_game.n_move = go_game.read_input(
    )
    go_game.set_board(game_piece_type, previous_board, current_board)
    player = Minimax()
    if go_game.new_game:
        player.cache = {}
        open("cache.txt", "w").close()
    player.side = game_piece_type
    next_action = player.get_input(go_game, game_piece_type)
    go_game.n_move += 2

    go_game.write_output(next_action)
Ejemplo n.º 8
0
def testMinimax():
    # qlearner = Q_learning_agent()
    random_player = RandomPlayer()
    minimax = Minimax()
    # qlearner.fight()
    # player1: Player instance.always X
    # player2: Player instance.always O
    p1_stats = [0, 0, 0]
    p2_stats = [0, 0, 0]
    player1 = minimax
    player2 = random_player
    for i in range(int(TEST_GAMES)):
        go = Game(GAME_SIZE)
        go.verbose = False
        go.new_board()
        result = go.play(player1, player2, False)
        p1_stats[result] += 1
    for i in range(int(TEST_GAMES)):
        go = Game(GAME_SIZE)
        go.verbose = False
        go.new_board()
        result = go.play(player2, player1, False)
        p2_stats[result] += 1

    print(p1_stats, p2_stats)
    p1_stats = [round(x / TEST_GAMES * 100.0, 1) for x in p1_stats]
    sys.stdout = open("Minimax_resutls.txt", "a")
    if True:
        print('_' * 60)
        print('{:>15}(X) | Wins:{}% Draws:{}% Losses:{}%'.format(
            player1.__class__.__name__, p1_stats[1], p1_stats[0],
            p1_stats[2]).center(50))
        print('{:>15}(O) | Wins:{}% Draws:{}% Losses:{}%'.format(
            player2.__class__.__name__, p1_stats[2], p1_stats[0],
            p1_stats[1]).center(50))
        print('_' * 60)
        print()
    p2_stats = [round(x / TEST_GAMES * 100.0, 1) for x in p2_stats]

    if True:
        print('_' * 60)
        print('{:>15}(X) | Wins:{}% Draws:{}% Losses:{}%'.format(
            player2.__class__.__name__, p2_stats[1], p2_stats[0],
            p2_stats[2]).center(50))
        print('{:>15}(O) | Wins:{}% Draws:{}% Losses:{}%'.format(
            player1.__class__.__name__, p2_stats[2], p2_stats[0],
            p2_stats[1]).center(50))
        print('_' * 60)
        print()
Ejemplo n.º 9
0
def testQlearner(dict_num):
    qlearner = Q_learning_agent()
    random_player = RandomPlayer()
    qlearner.fight(dict_num)
    if dict_num > 0:
        qlearner.load_dict(dict_num)
    # player1: Player instance.always X
    # player2: Player instance.always O
    p1_stats = [0, 0, 0]
    p2_stats = [0, 0, 0]
    player1 = qlearner
    player2 = random_player
    for i in range(int(TEST_GAMES)):
        go = Game(GAME_SIZE)
        go.verbose = False
        go.new_board()
        result = go.play(player1, player2, False)
        p1_stats[result] += 1
    for i in range(int(TEST_GAMES)):
        go = Game(GAME_SIZE)
        go.verbose = False
        go.new_board()
        result = go.play(player2, player1, False)
        p2_stats[result] += 1

    print(p1_stats, p2_stats)
    p1_stats = [round(x / TEST_GAMES * 100.0, 1) for x in p1_stats]
    if True:
        print('_' * 60)
        print('{:>15}(X) | Wins:{}% Draws:{}% Losses:{}%'.format(
            player1.__class__.__name__, p1_stats[1], p1_stats[0],
            p1_stats[2]).center(50))
        print('{:>15}(O) | Wins:{}% Draws:{}% Losses:{}%'.format(
            player2.__class__.__name__, p1_stats[2], p1_stats[0],
            p1_stats[1]).center(50))
        print('_' * 60)
        print()
    p2_stats = [round(x / TEST_GAMES * 100.0, 1) for x in p2_stats]

    if True:
        print('_' * 60)
        print('{:>15}(X) | Wins:{}% Draws:{}% Losses:{}%'.format(
            player2.__class__.__name__, p2_stats[1], p2_stats[0],
            p2_stats[2]).center(50))
        print('{:>15}(O) | Wins:{}% Draws:{}% Losses:{}%'.format(
            player1.__class__.__name__, p2_stats[2], p2_stats[0],
            p2_stats[1]).center(50))
        print('_' * 60)
        print()
Ejemplo n.º 10
0
    def alpha_beta_adaptive_agent(self, go: Game, depth=4):
        def max_value(board, alpha, beta, depth):
            if depth == 0 or board.is_game_finished():
                state = board.state_string()
                if state in self.cache:
                    return self.cache[state]
                return self.total_score(board, self.identity)
            v_max = -numpy.inf
            candidates = []
            for i in range(board.size):
                for j in range(board.size):
                    if board.is_position_valid(i,
                                               j,
                                               self.identity,
                                               test_check=True):
                        candidates.append((i, j))
            random.shuffle(candidates)
            if not candidates:
                action = "PASS"
                v_max = max(v_max, min_value(board, alpha, beta, depth - 1))
                if v_max <= alpha:
                    return v_max
                alpha = max(alpha, v_max)
            else:
                for i, j in candidates:
                    poss_max_board = board.make_copy()
                    poss_max_board.next_board(i, j, self.identity, False)
                    poss_max_board.n_move += 1
                    v_max = max(
                        v_max, min_value(poss_max_board, alpha, beta,
                                         depth - 1))
                    if v_max is not None:
                        state = board.state_string()
                        self.cache[state] = v_max
                    if v_max >= beta:
                        return v_max
                    alpha = max(alpha, v_max)
            return v_max

        def min_value(board, alpha, beta, depth):
            if depth == 0 or board.is_game_finished():
                state = board.state_string()
                if state in self.cache:
                    return self.cache[state]
                return self.total_score(board, self.identity)
            v_min = numpy.inf
            candidates = []
            for i in range(board.size):
                for j in range(board.size):
                    if board.is_position_valid(i,
                                               j,
                                               self.opponent,
                                               test_check=True):
                        candidates.append((i, j))
            random.shuffle(candidates)
            if not candidates:
                action = "PASS"
                v_min = min(v_min, max_value(board, alpha, beta, depth - 1))
                if v_min <= alpha:
                    return v_min
                beta = min(beta, v_min)
            else:
                for i, j in candidates:
                    poss_min_board = board.make_copy()
                    valid = poss_min_board.next_board(i, j, self.opponent,
                                                      True)
                    poss_min_board.n_move += 1
                    if not valid:
                        raise ValueError("in min invalid move")
                    v_min = min(
                        v_min, max_value(poss_min_board, alpha, beta,
                                         depth - 1))
                    if v_min is not None:
                        state = board.state_string()
                        self.cache[state] = v_min
                    if v_min <= alpha:
                        return v_min
                    beta = min(beta, v_min)
            return v_min

        best_score = -numpy.inf
        beta = numpy.inf
        best_action = None
        candidates = []
        for i in range(go.size):
            for j in range(go.size):
                if go.is_position_valid(i, j, self.identity, test_check=True):
                    candidates.append((i, j))
        random.shuffle(candidates)
        if go.n_move < 6:
            depth = 0
        elif go.n_move < 10:
            depth = 2
        elif len(candidates) < 24 - 18:
            depth = len(candidates)
        if not candidates:
            best_action = "PASS"
        else:
            for i, j in candidates:
                possible_board = go.make_copy()
                possible_board.next_board(i, j, self.identity, True)
                possible_board.n_move += 1
                value = min_value(possible_board, best_score, beta, depth)
                if value > best_score:
                    best_score = value
                    best_action = (i, j)
        return best_action
Ejemplo n.º 11
0
            else:
                # self.q_values[state][move] = self.q_values[state][move] * (1 - self.alpha) \
                # + self.alpha * self.gamma * max_q_value
                # base_state_action_q[move] = base_state_action_q[move] * (1 - self.alpha) \
                # + self.alpha * self.gamma * max_q_value
                base_state_action_q[move] = base_state_action_q[move] \
                                            + self.alpha * (self.gamma * max_q_value - base_state_action_q[move])
            max_q_value = max(base_state_action_q.values())
        if num_game % int(self.LEARN_GAMES / 100) == 0:
            self.update_epsilon()
            self.update_alpha()
        if num_game % int(self.LEARN_GAMES / 100) == 0:
            if self.file_count == 5:
                self.file_count = 0
            self.save_dict(self.file_count)
            self.save_policy(self.file_count)
            self.file_count += 1
        self.states_to_update = []


if __name__ == "__main__":
    N = 5
    game_piece_type, previous_board, board = readInput(N)
    go_game = Game(N)
    go_game.set_board(game_piece_type, previous_board, board)
    player = Q_learning_agent()
    Q_learning_agent.identity = game_piece_type
    player.fight()
    next_action = player.get_input(go_game, game_piece_type)
    writeOutput(next_action)