Beispiel #1
0
    def test_alphabeta_vs_penalty(self):

        t = TTT(3)
        player1 = ABPruning(3)
        player2 = ABPruning(3)
        player2.set_penalty(0.2)

        scores = {-4: 0, -2: 0, 0: 0, 1: 0, 3: 0, 5: 0}
        game_played = 0

        while game_played < 11:
            if t.is_terminated():
                score = t.get_score()
                scores[score] += 1
                game_played += 1
                t = TTT(3)
                pass

            mover = t.get_mover()
            if mover == 1:
                [_, move] = player1.get(t.get_state(), mover)
                t.put(move)
            elif mover == -1:
                [_, move] = player2.get(t.get_state(), mover)
                t.put(move)

            pass

        print(scores)
        wrong_cases = scores[-4] + scores[-2]
        self.assertTrue(wrong_cases == 0)
Beispiel #2
0
 def test_minimax_1(self):
     t = TTT(3)
     state = [[1, -1, 0], [-1, 1, 0], [0, 0, 0]]
     state = np.array(state, dtype='int')
     state = state.reshape(-1)
     t._state = state
     [score, move] = minimax(t.get_state(), 1, t)
     self.assertListEqual(list(state), list(t.get_state()))
     self.assertEqual(8, move)
     self.assertEqual(5, score)
Beispiel #3
0
 def test_alpha_beta_1(self):
     t = TTT(3)
     player = ABPruning(3)
     state = [[1, -1, 0], [-1, 1, 0], [0, 0, 0]]
     state = np.array(state, dtype='int')
     state = state.reshape(-1)
     t._state = state
     [score, move] = player.get(t.get_state(), 1)
     self.assertListEqual(list(state), list(t.get_state()))
     self.assertEqual(8, move)
     self.assertEqual(5, score)
Beispiel #4
0
def initialize_minimax(filepath: str, size=3):
    table = {}
    t = TTT(size)
    minimax_save(t.get_state(), t.get_mover(), t, table)
    with open(filepath, 'wb') as f:
        pickle.dump(table, f)

    return
Beispiel #5
0
    def run_game(self, agent1, agent2, size=3):

        t = TTT(size)
        for i in range(size * size):
            agent = agent1 if i % 2 == 0 else agent2
            inferred = agent(t.get_state())
            t.put(inferred)
            if t.is_terminated():
                break

        return t.get_result()
Beispiel #6
0
    def test_penalty_vs_penalty(self):

        t = TTT(3)
        player1 = ABPruning(3)
        player1.set_penalty(0.7)
        player2 = ABPruning(3)
        player2.set_penalty(0.7)

        games_played = 1
        scores = set()
        case1 = {1, 2, 3, 4}
        case2 = {-1, -2, -3}
        case3 = {0}

        while True:
            if t.is_terminated():
                score = t.get_score()
                scores.add(score)

                # check whether if win,draw,lose all happened
                wins = case1 & scores
                loses = case2 & scores
                draw = case3 & scores
                if len(wins) > 0:
                    if len(loses) > 0:
                        if len(draw) > 0:
                            break
                t = TTT(3)
                games_played += 1
                pass

            mover = t.get_mover()
            if mover == 1:
                [_, move] = player1.get(t.get_state(), mover)
                t.put(move)
            elif mover == -1:
                [_, move] = player2.get(t.get_state(), mover)
                t.put(move)

        self.assertTrue(len(scores) > 2)
Beispiel #7
0
def initialize_state_indices(filepath: str, size=3):
    table = {'current': 0}  # store state:index pair
    t = TTT(size)

    def dfs(state, mover: int, table=table) -> None:

        # store if the state is new one :
        encoded_state = t.get_encoded_state(state)
        if not encoded_state in table:
            table[encoded_state] = table['current']
            table['current'] += 1

        assert type(table[encoded_state]) is int

        next_mover = 1 if mover is -1 else -1
        available_moves = t.get_available_positions(state)
        for i in available_moves:
            next_state = state.copy()
            next_state[i] = mover
            if not t.is_terminated(next_state):
                dfs(next_state, next_mover)

        return

    # indexing start :
    initial_mover = t.get_mover()
    initial_state = t.get_state()
    print('indexing start :')
    dfs(initial_state, initial_mover)

    # simple validate :
    num_visited = table['current']
    del (table['current'])
    num_stored = len(table)
    print(f'visited states : {num_visited}')
    print(f'stored states : {num_stored}')
    assert num_stored == num_visited
    indices = set(table.values())
    assert len(indices) == len(table)
    sample_index = list(table.values())[1]
    assert type(sample_index) is int

    # save :
    print('saving... ', end='')
    with open(filepath, 'wb') as f:
        pickle.dump(table, f)
    print('done')

    return
Beispiel #8
0
    def test_alphabeta_vs_minimax(self):

        t = TTT(3)
        minimax_player = minimax_load('results/minimax.pk')
        alphabeta_player = ABPruning(3)

        moves = 0
        print('Moves : 0 ', end='')
        while True:
            if t.get_mover() == 1:
                [_, best_move] = alphabeta_player.get(t.get_state(),
                                                      t.get_mover())
            elif t.get_mover() == -1:
                [_, best_move] = minimax_player(t.get_state())
            t.put(best_move)
            moves += 1
            print(f'{moves} ', end='')
            if t.is_terminated():
                break
            pass

        print('final state')
        print(t)
        self.assertEqual(t.check_winner()['winner'], 0)
Beispiel #9
0
    def _train_against(self,opponent_agent:Callable[[np.ndarray],int],numOfGames:int)->None:

        agent_q_turn = self._is_first_mover
        for _ in tqdm(range(numOfGames)):
            game = TTT(self._size)
            turn = True

            # one complete game :
            # prev state, action taken are from agent's turn
            # next state is from opponent's turn.
            # update in opponent's turn
            encoded_prev_state = None
            move_taken = None
            encoded_next_state = None
            while True:

                if turn is agent_q_turn:
                    # Q turn :
                    if game.is_terminated():
                        break
                    else:
                        possible_moves = game.get_available_positions()
                        encoded_prev_state = game.get_encoded_state()
                        move_taken = self._epsilon_greedy_train(encoded_prev_state,possible_moves)
                        game.put(move_taken)
                        pass
                    pass
                else:
                    # opponent's turn :
                    if not game.is_terminated():
                        state = game.get_state()
                        # move below is considered as random (sampling procedure) :
                        move = opponent_agent(state)
                        game.put(move)
                        pass
                    encoded_next_state = game.get_encoded_state()
                    score = game.get_score()
                    if encoded_prev_state is not None:
                        # : to avoid just after first move case ( in case of Q is second mover )
                        self.update(encoded_prev_state,move_taken,encoded_next_state,score)
                    
                    pass
                
                turn = not turn
            pass
        
        return None
Beispiel #10
0
    def test_minimax_vs_minimax(self):

        size = 3
        t = TTT(size)
        filepath = 'results/minimax.pk'
        minimax_loaded = minimax_load(filepath)
        moves = 0
        while True:
            [_, best_move] = minimax_loaded(t.get_state())
            t.put(best_move)
            moves += 1
            if t.is_terminated():
                break
            pass

        self.assertEqual(t.check_winner()['winner'], 0)
        pass
Beispiel #11
0
    def test_alphabeta_vs_alphabeta(self):

        t = TTT(3)
        player = ABPruning(3)
        moves = 0
        print('Moves : 0 ', end='')
        while True:
            [_, best_move] = player.get(t.get_state(), t.get_mover())
            t.put(best_move)
            moves += 1
            print(f'{moves} ', end='')
            if t.is_terminated():
                break
            pass

        print('final state')
        print(t)
        self.assertEqual(t.check_winner()['winner'], 0)
Beispiel #12
0
class GameWindow(tk.Toplevel):
    """Game UI"""
    def __init__(self, user_first: bool, size=3, *args, **kwargs):

        super().__init__(*args, **kwargs)

        # state variables
        self._user_first = user_first
        self._t = TTT(size)
        self._agent: Callable[[np.ndarray], int]
        self._num_of_moves = 0
        self._state_history = [self._t.get_state()]

        # UI accessors
        self._history_scale: tk.Scale
        self._player_labels: Dict[int, tk.Label]  # key : 1,2
        self._buttons = []

        # UI initialization
        self.title(f'TTT')
        self._make_top_frame()
        self._make_board(size)
        self._make_bottom_frame(size)
        return

    #region Public Methods
    def set_agent(self, agent: Callable[[np.ndarray], int], name: str) -> None:

        self._agent = agent
        return

    def get_result(self) -> dict:

        return self._t.get_result()

    #endregion

    #region Put UI Components
    def _make_top_frame(self):

        frame = tk.Frame(self)
        if self._user_first:
            text1 = 'O : User'
            text2 = 'X : AI'
        else:
            text1 = 'O : AI'
            text2 = 'X : User'
        label1 = tk.Label(frame, text=text1)
        label2 = tk.Label(frame, text=text2)
        label1.pack()
        label2.pack()
        frame.pack()
        return

    def _make_board(self, size):

        board = tk.Frame(self)
        buttons = self._buttons
        num_of_buttons = size * size
        for i in range(num_of_buttons):
            b = tk.Button(board,
                          width=3,
                          height=1,
                          font=('Helvetica', 30),
                          activebackground='white',
                          command=lambda num=i: self._on_click_board(num))
            buttons.append(b)
            b.grid(column=i % size, row=int(i / size))
            pass

        board.pack()
        return

    def _make_bottom_frame(self, size):

        frame = tk.Frame(self)

        history_scale = tk.Scale(frame,
                                 command=self._on_scale_move,
                                 orient='horizontal',
                                 from_=0,
                                 to=0)
        history_scale.grid(row=0, columnspan=2)
        self._history_scale = history_scale

        restart_button = tk.Button(frame,
                                   text="Restart",
                                   command=self._on_click_reset)
        exit_button = tk.Button(frame, text="Exit", command=self.destroy)
        restart_button.grid(row=1, column=0)
        exit_button.grid(row=1, column=1)

        frame.pack()

        return

    #endregion

    #region Event Handlers
    def _on_click_board(self, position: int):

        state_num = int(self._history_scale.get())
        is_rewinded = not (self._num_of_moves == state_num)
        if is_rewinded:
            # reset the game to the rewinded one :
            state_to_force = self._state_history[state_num]
            self._t.set_state(state_to_force)
            self._num_of_moves = self._t._num_moves
            self._state_history = self._state_history[0:(self._num_of_moves +
                                                         1)]
            pass

        self._t.put(position)
        current_state = self._t.get_state()
        self._state_history.append(current_state)
        self._num_of_moves += 1
        self._history_scale.configure(to=self._num_of_moves)
        self._history_scale.set(self._num_of_moves)
        """
        [issue] If this procedure is called by button.invoke()
        then it doesn't invoke the scale's command _on_scale_move.
        So call it manually (and hence, called twice in user's turn) :
        """
        self._on_scale_move(self._num_of_moves)

        return

    def _on_scale_move(self, state_num):

        state_num = int(state_num)
        first_mover_turn = True if state_num % 2 == 0 else False
        user_turn = first_mover_turn == self._user_first

        self._set_board(state_num, user_turn)

        if self.get_result()['terminated']:
            return

        if state_num == len(self._state_history) - 1:
            if user_turn:
                pass
            else:
                if hasattr(self, '_agent'):
                    self._on_agent_turn(state_num)
                pass
        else:
            # : agent's turn but it's a previous state
            pass

        return

    def _on_click_reset(self):

        self._num_of_moves = 0
        self._state_history = self._state_history[0:1]
        self._t.set_state(self._state_history[0])
        self._history_scale.configure(to=0)
        self._history_scale.set(0)
        self._set_board(0, self._user_first == True)

        return

    #endregion

    #region Private Methods
    def _on_agent_turn(self, state_num: int):

        # TODO : async progress bar
        state = self._state_history[state_num]
        move = self._agent(state)
        button = self._buttons[move]
        button.configure(state='normal')
        button.invoke()

        return

    def _set_board(self, state_num: int, user_turn: bool):
        """Modify board UI"""

        to_state = self._state_history[state_num]
        result = self._t.get_result(to_state)
        terminated = result['terminated']
        lines = result['lines']
        lines = sum(lines, [])  # flattening
        for p in range(len(to_state)):
            move = int(to_state[p])
            of_line = p in lines
            self._modify_button(p, move, user_turn, terminated, of_line)
        return

    def _modify_button(self,
                       button_position: int,
                       mover: int,
                       move_allowed: bool,
                       terminated=False,
                       of_line=False):

        button = self._buttons[button_position]

        args = {'disabledforeground': 'black', 'state': 'disabled'}
        if mover == 1:
            args['text'] = '○'
            args['state'] = 'disabled'
        elif mover == -1:
            args['text'] = '×'
            args['state'] = 'disabled'
        else:
            args['text'] = ' '
            if move_allowed:
                args['state'] = 'normal'
            elif not hasattr(self, '_agent'):
                args['state'] = 'normal'

        if terminated:
            args['state'] = 'disabled'
            if of_line:
                if mover == 1:
                    args['disabledforeground'] = 'steelblue'
                elif mover == -1:
                    args['disabledforeground'] = 'tomato'

        button.config(**args)

        return