Esempio n. 1
0
    def __init__(self,size=3):
        
        self._t = TTT(size)
        self._mode = 'optimal'
        self._penalty_prob = 0

        pass
Esempio n. 2
0
    def test_as_second_mover(self):

        parameters = {
            "ep_train": 0.5,
            "ep_infer": 0,
            "gamma": 1,
            "alpha": 1,
            "agent_for": 'minimizer',
        }
        q = TabularQ(3)
        q.set_params(**parameters)
        opponent_agent = load('minimax')
        q.train(numOfGames=500, opponent_agent=opponent_agent)

        t = TTT(3)

        Q = q._Q
        updated_state_indices = np.where(
            Q != [0, 0, 0, 0, 0, 0, 0, 0, 0])[0]  # [0] for row indices
        updated_state_indices = set(updated_state_indices)

        for i in updated_state_indices:
            state = q.get_state(i)
            mover = t.get_mover(state=state)
            self.assertEqual(mover, -1)

        return
Esempio n. 3
0
def minimax(state, mover: int, t: TTT) -> [Score, Move]:

    next_mover = -1 if mover is 1 else 1
    possible_moves = t.get_available_positions(state)
    corresponding_scores = []
    best_score = 0
    best_move = None

    for index in possible_moves:
        next_state = state.copy()
        next_state[index] = mover
        if t.is_terminated(next_state):
            score = t.get_score(next_state)
            corresponding_scores.append(score)
        else:
            [score, _] = minimax(next_state, next_mover, t)
            corresponding_scores.append(score)

    if mover == 1:
        best_score = max(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]
    elif mover == -1:
        best_score = min(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]

    return [best_score, best_move]
Esempio n. 4
0
def minimax_save(state, mover: int, t: TTT, table) -> (Score, Move):

    encoded_state = encode_state(state)
    if encode_state in table:
        return table[encoded_state]

    next_mover = -1 if mover is 1 else 1
    possible_moves = t.get_available_positions(state)
    corresponding_scores = []
    best_score = 0
    best_move = None

    for index in possible_moves:
        next_state = state.copy()
        next_state[index] = mover
        if t.is_terminated(next_state):
            score = t.get_score(next_state)
            corresponding_scores.append(score)
        else:
            [score, _] = minimax_save(next_state, next_mover, t, table)
            corresponding_scores.append(score)

    if mover == 1:
        best_score = max(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]
    elif mover == -1:
        best_score = min(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]

    table[encoded_state] = (best_score, best_move)
    return (best_score, best_move)
Esempio n. 5
0
 def test_row(self):
     t3 = TTT(3)
     s3 = np.array([0, 0, 0, -1, -1, 0, 1, 1, 1])
     self.assertEqual(t3.check_winner(s3), {
         'winner': 1,
         'lines': [[6, 7, 8]]
     })
     self.assertTrue(t3.is_terminated(s3))
Esempio n. 6
0
 def test_column(self):
     t4 = TTT(4)
     s4 = np.array([1, 0, 0, -1, 0, 1, 0, -1, 1, 0, 0, -1, 0, 1, 0, -1])
     self.assertEqual(t4.check_winner(s4), {
         'winner': -1,
         'lines': [[3, 7, 11, 15]]
     })
     self.assertTrue(t4.is_terminated(s4))
Esempio n. 7
0
 def test_score1(self):
     t3 = TTT(3)
     s = [[1, -1, 0], [-1, 1, 0], [0, 0, 1]]
     s = np.array(s).reshape(-1)
     terminated = t3.is_terminated(s)
     score = t3.get_score(s)
     self.assertTrue(terminated)
     self.assertEqual(score, 5)
Esempio n. 8
0
    def test_get_mover(self):
        t = TTT(3)
        s = [[0, 1, 0], [0, -1, 0], [0, 0, 0]]
        s = np.array(s)
        s = s.reshape(-1)

        mover = t.get_mover(state=s)
        self.assertTrue(mover == 1)
Esempio n. 9
0
def initialize_minimax(filepath: str, size=3):
    table = {}
    t = TTT(size)
    minimax_save(t.get_state(), t.get_mover(), t, table)
    with open(filepath, 'wb') as f:
        pickle.dump(table, f)

    return
Esempio n. 10
0
 def test_minimax_1(self):
     t = TTT(3)
     state = [[1, -1, 0], [-1, 1, 0], [0, 0, 0]]
     state = np.array(state, dtype='int')
     state = state.reshape(-1)
     t._state = state
     [score, move] = minimax(t.get_state(), 1, t)
     self.assertListEqual(list(state), list(t.get_state()))
     self.assertEqual(8, move)
     self.assertEqual(5, score)
Esempio n. 11
0
 def test_alpha_beta_1(self):
     t = TTT(3)
     player = ABPruning(3)
     state = [[1, -1, 0], [-1, 1, 0], [0, 0, 0]]
     state = np.array(state, dtype='int')
     state = state.reshape(-1)
     t._state = state
     [score, move] = player.get(t.get_state(), 1)
     self.assertListEqual(list(state), list(t.get_state()))
     self.assertEqual(8, move)
     self.assertEqual(5, score)
Esempio n. 12
0
 def test_result1(self):
     t3 = TTT(3)
     s = [[1, -1, -1], [-1, 1, 1], [1, -1, 1]]
     s = np.array(s).reshape(-1)
     result = t3.get_result(s)
     to_equal = {
         'terminated': True,
         'score': 1,
         'winner': 1,
         'lines': [[0, 4, 8]]
     }
     self.assertDictEqual(result, to_equal)
Esempio n. 13
0
 def test_set_state(self):
     t = TTT(3)
     state = [1, 0, 0, 1, 1, 0, -1, -1, 0]
     t.set_state(state)
     mover = t.get_mover()
     order = t._order
     num_of_moves = t._num_moves
     _state = np.array(state, dtype=int)
     self.assertEqual(mover, -1)
     self.assertEqual(order, False)
     self.assertEqual(num_of_moves, 5)
     self.assertTrue(np.array_equal(_state, t._state))
Esempio n. 14
0
def initialize_state_indices(filepath: str, size=3):
    table = {'current': 0}  # store state:index pair
    t = TTT(size)

    def dfs(state, mover: int, table=table) -> None:

        # store if the state is new one :
        encoded_state = t.get_encoded_state(state)
        if not encoded_state in table:
            table[encoded_state] = table['current']
            table['current'] += 1

        assert type(table[encoded_state]) is int

        next_mover = 1 if mover is -1 else -1
        available_moves = t.get_available_positions(state)
        for i in available_moves:
            next_state = state.copy()
            next_state[i] = mover
            if not t.is_terminated(next_state):
                dfs(next_state, next_mover)

        return

    # indexing start :
    initial_mover = t.get_mover()
    initial_state = t.get_state()
    print('indexing start :')
    dfs(initial_state, initial_mover)

    # simple validate :
    num_visited = table['current']
    del (table['current'])
    num_stored = len(table)
    print(f'visited states : {num_visited}')
    print(f'stored states : {num_stored}')
    assert num_stored == num_visited
    indices = set(table.values())
    assert len(indices) == len(table)
    sample_index = list(table.values())[1]
    assert type(sample_index) is int

    # save :
    print('saving... ', end='')
    with open(filepath, 'wb') as f:
        pickle.dump(table, f)
    print('done')

    return
Esempio n. 15
0
    def test_update(self):
        t = TTT(3)
        prev_state = [[1, 1, 0], [-1, -1, 0], [0, 0, 0]]
        next_state = [[1, 1, 1], [-1, -1, 0], [0, 0, 0]]
        prev_state = np.array(prev_state).reshape(-1)
        next_state = np.array(next_state).reshape(-1)
        result = t.get_result(next_state)
        self.assertEqual(result, {'terminated': True, 'score': 5})

        q = TabularQ(3)
        q.set_params(alpha=1, gamma=1)

        encoded_prev_state = t.get_encoded_state(prev_state)
        prev_state_index = q.get_index(encoded_prev_state)
        encoded_next_state = t.get_encoded_state(next_state)
        next_state_index = q.get_index(encoded_next_state)
        self.assertEqual(next_state_index, None)

        q.update(encoded_prev_state, 2, encoded_next_state, 5)
        updated_row = q._Q[prev_state_index, :]

        check_row = np.array_equal(updated_row, [0, 0, 5, 0, 0, 0, 0, 0, 0])
        self.assertTrue(check_row)

        # test correct inference :
        q._is_first_mover = True
        possible_moves = t.get_available_positions(prev_state)
        inferred = q.infer(encoded_prev_state, possible_moves, 1)
        self.assertEqual(inferred, 2)

        pass
Esempio n. 16
0
    def __init__(self, user_first: bool, size=3, *args, **kwargs):

        super().__init__(*args, **kwargs)

        # state variables
        self._user_first = user_first
        self._t = TTT(size)
        self._agent: Callable[[np.ndarray], int]
        self._num_of_moves = 0
        self._state_history = [self._t.get_state()]

        # UI accessors
        self._history_scale: tk.Scale
        self._player_labels: Dict[int, tk.Label]  # key : 1,2
        self._buttons = []

        # UI initialization
        self.title(f'TTT')
        self._make_top_frame()
        self._make_board(size)
        self._make_bottom_frame(size)
        return
Esempio n. 17
0
    def run_game(self, agent1, agent2, size=3):

        t = TTT(size)
        for i in range(size * size):
            agent = agent1 if i % 2 == 0 else agent2
            inferred = agent(t.get_state())
            t.put(inferred)
            if t.is_terminated():
                break

        return t.get_result()
Esempio n. 18
0
    def test_diagonal2(self):
        t3 = TTT(3)
        s3 = np.array([1, 0, -1, 1, -1, 0, -1, 1, 0])
        self.assertEqual(t3.check_winner(s3), {
            'winner': -1,
            'lines': [[2, 4, 6]]
        })
        self.assertTrue(t3.is_terminated(s3))

        t4 = TTT(4)
        s4 = np.array([-1, 0, 0, 1, -1, 0, 1, 0, -1, 1, 0, 0, 1, 0, 0, 0])
        self.assertTrue(t4.is_terminated(s4))
        self.assertEqual(t4.check_winner(s4), {
            'winner': 1,
            'lines': [[3, 6, 9, 12]]
        })
Esempio n. 19
0
    def test_diagonal1(self):
        t3 = TTT(3)
        s3 = np.array([1, 0, -1, 0, 1, -1, 0, 0, 1])
        self.assertTrue(t3.is_terminated(s3))
        self.assertEqual(t3.check_winner(s3), {
            'winner': 1,
            'lines': [[0, 4, 8]]
        })

        t4 = TTT(4)
        s4 = np.array([-1, 0, 0, 1, 0, -1, 0, 1, 0, 1, -1, 0, 1, 0, 0, -1])
        self.assertTrue(t4.is_terminated(s4))
        self.assertEqual(t4.check_winner(s4), {
            'winner': -1,
            'lines': [[0, 5, 10, 15]]
        })
Esempio n. 20
0
    def test_alphabeta_vs_alphabeta(self):

        t = TTT(3)
        player = ABPruning(3)
        moves = 0
        print('Moves : 0 ', end='')
        while True:
            [_, best_move] = player.get(t.get_state(), t.get_mover())
            t.put(best_move)
            moves += 1
            print(f'{moves} ', end='')
            if t.is_terminated():
                break
            pass

        print('final state')
        print(t)
        self.assertEqual(t.check_winner()['winner'], 0)
Esempio n. 21
0
    def test_minimax_vs_minimax(self):

        size = 3
        t = TTT(size)
        filepath = 'results/minimax.pk'
        minimax_loaded = minimax_load(filepath)
        moves = 0
        while True:
            [_, best_move] = minimax_loaded(t.get_state())
            t.put(best_move)
            moves += 1
            if t.is_terminated():
                break
            pass

        self.assertEqual(t.check_winner()['winner'], 0)
        pass
Esempio n. 22
0
    def _train_both(self,numOfGames):
        for _ in tqdm(range(numOfGames)):
            game = TTT(self._size)
            self._is_first_mover = True

            # one complete game :
            while True:
                encoded_prev_state = game.get_encoded_state()

                possible_moves = game.get_available_positions()
                selected_move = self._epsilon_greedy_train(encoded_prev_state,possible_moves)
                game.put(selected_move)

                encoded_next_state =  game.get_encoded_state()
                result = game.get_result()
                self.update(encoded_prev_state,selected_move,encoded_next_state,result['score'])
                if result['terminated']:
                    break
                pass

            pass
Esempio n. 23
0
    def test_deterministic_vs_minimax(self):
        # gamma, alpha == 1 guarantees that for endstates s and optimal move a,
        # Q(s,a) = R(s,a) IF Q(s,a) IS NOT 0
        # Here, R(s,a) is the score of the terminated state
        parameters = {
            "ep_train": 0.5,
            "ep_infer": 0,
            "gamma": 1,
            "alpha": 1,
            "agent_for": 'both',
        }
        q = TabularQ(3)
        q.set_params(**parameters)
        q.train(numOfGames=500)

        s = Settings()
        minimax = minimax_load(s.path('minimax'))
        t = TTT(3)

        Q = q._Q
        to_check_state_indices = np.where(Q != [0, 0, 0, 0, 0, 0, 0, 0, 0])[0]
        to_check_state_indices = map(int, to_check_state_indices)

        for state_index in to_check_state_indices:

            self.assertFalse(
                np.array_equal(Q[state_index],
                               np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])))
            state = q.get_state(state_index)
            encoded_state = t.get_encoded_state(state)
            mover = t.get_mover(state=state)
            possible_moves = t.get_available_positions(state)

            if mover == 1:
                best_move_q = np.argmax(Q[state_index])
                if int(Q[state_index, best_move_q]) is not 0:
                    move_inferred = q.infer(encoded_state, possible_moves,
                                            mover)
                    q_value_1 = Q[state_index, best_move_q]
                    q_value_2 = Q[state_index, move_inferred]
                    self.assertEqual(q_value_1, q_value_2)
            elif mover == -1:
                best_move_q = np.argmin(Q[state_index])
                if int(Q[state_index, best_move_q]) is not 0:
                    move_inferred = q.infer(encoded_state, possible_moves,
                                            mover)
                    q_value_1 = Q[state_index, best_move_q]
                    q_value_2 = Q[state_index, move_inferred]
                    self.assertEqual(q_value_1, q_value_2)

            next_state = state.copy()
            next_state[best_move_q] = mover

            result = t.get_result(next_state)
            if result['terminated']:
                best_score, _ = minimax(state)
                q_value = Q[state_index, best_move_q]
                if best_score != q_value:
                    # not yet sampled (s,a)
                    # or withdraw case
                    self.assertEqual(q_value, 0)
                else:
                    # sampled (s,a)
                    self.assertEqual(best_score, q_value)
            pass
Esempio n. 24
0
class GameWindow(tk.Toplevel):
    """Game UI"""
    def __init__(self, user_first: bool, size=3, *args, **kwargs):

        super().__init__(*args, **kwargs)

        # state variables
        self._user_first = user_first
        self._t = TTT(size)
        self._agent: Callable[[np.ndarray], int]
        self._num_of_moves = 0
        self._state_history = [self._t.get_state()]

        # UI accessors
        self._history_scale: tk.Scale
        self._player_labels: Dict[int, tk.Label]  # key : 1,2
        self._buttons = []

        # UI initialization
        self.title(f'TTT')
        self._make_top_frame()
        self._make_board(size)
        self._make_bottom_frame(size)
        return

    #region Public Methods
    def set_agent(self, agent: Callable[[np.ndarray], int], name: str) -> None:

        self._agent = agent
        return

    def get_result(self) -> dict:

        return self._t.get_result()

    #endregion

    #region Put UI Components
    def _make_top_frame(self):

        frame = tk.Frame(self)
        if self._user_first:
            text1 = 'O : User'
            text2 = 'X : AI'
        else:
            text1 = 'O : AI'
            text2 = 'X : User'
        label1 = tk.Label(frame, text=text1)
        label2 = tk.Label(frame, text=text2)
        label1.pack()
        label2.pack()
        frame.pack()
        return

    def _make_board(self, size):

        board = tk.Frame(self)
        buttons = self._buttons
        num_of_buttons = size * size
        for i in range(num_of_buttons):
            b = tk.Button(board,
                          width=3,
                          height=1,
                          font=('Helvetica', 30),
                          activebackground='white',
                          command=lambda num=i: self._on_click_board(num))
            buttons.append(b)
            b.grid(column=i % size, row=int(i / size))
            pass

        board.pack()
        return

    def _make_bottom_frame(self, size):

        frame = tk.Frame(self)

        history_scale = tk.Scale(frame,
                                 command=self._on_scale_move,
                                 orient='horizontal',
                                 from_=0,
                                 to=0)
        history_scale.grid(row=0, columnspan=2)
        self._history_scale = history_scale

        restart_button = tk.Button(frame,
                                   text="Restart",
                                   command=self._on_click_reset)
        exit_button = tk.Button(frame, text="Exit", command=self.destroy)
        restart_button.grid(row=1, column=0)
        exit_button.grid(row=1, column=1)

        frame.pack()

        return

    #endregion

    #region Event Handlers
    def _on_click_board(self, position: int):

        state_num = int(self._history_scale.get())
        is_rewinded = not (self._num_of_moves == state_num)
        if is_rewinded:
            # reset the game to the rewinded one :
            state_to_force = self._state_history[state_num]
            self._t.set_state(state_to_force)
            self._num_of_moves = self._t._num_moves
            self._state_history = self._state_history[0:(self._num_of_moves +
                                                         1)]
            pass

        self._t.put(position)
        current_state = self._t.get_state()
        self._state_history.append(current_state)
        self._num_of_moves += 1
        self._history_scale.configure(to=self._num_of_moves)
        self._history_scale.set(self._num_of_moves)
        """
        [issue] If this procedure is called by button.invoke()
        then it doesn't invoke the scale's command _on_scale_move.
        So call it manually (and hence, called twice in user's turn) :
        """
        self._on_scale_move(self._num_of_moves)

        return

    def _on_scale_move(self, state_num):

        state_num = int(state_num)
        first_mover_turn = True if state_num % 2 == 0 else False
        user_turn = first_mover_turn == self._user_first

        self._set_board(state_num, user_turn)

        if self.get_result()['terminated']:
            return

        if state_num == len(self._state_history) - 1:
            if user_turn:
                pass
            else:
                if hasattr(self, '_agent'):
                    self._on_agent_turn(state_num)
                pass
        else:
            # : agent's turn but it's a previous state
            pass

        return

    def _on_click_reset(self):

        self._num_of_moves = 0
        self._state_history = self._state_history[0:1]
        self._t.set_state(self._state_history[0])
        self._history_scale.configure(to=0)
        self._history_scale.set(0)
        self._set_board(0, self._user_first == True)

        return

    #endregion

    #region Private Methods
    def _on_agent_turn(self, state_num: int):

        # TODO : async progress bar
        state = self._state_history[state_num]
        move = self._agent(state)
        button = self._buttons[move]
        button.configure(state='normal')
        button.invoke()

        return

    def _set_board(self, state_num: int, user_turn: bool):
        """Modify board UI"""

        to_state = self._state_history[state_num]
        result = self._t.get_result(to_state)
        terminated = result['terminated']
        lines = result['lines']
        lines = sum(lines, [])  # flattening
        for p in range(len(to_state)):
            move = int(to_state[p])
            of_line = p in lines
            self._modify_button(p, move, user_turn, terminated, of_line)
        return

    def _modify_button(self,
                       button_position: int,
                       mover: int,
                       move_allowed: bool,
                       terminated=False,
                       of_line=False):

        button = self._buttons[button_position]

        args = {'disabledforeground': 'black', 'state': 'disabled'}
        if mover == 1:
            args['text'] = '○'
            args['state'] = 'disabled'
        elif mover == -1:
            args['text'] = '×'
            args['state'] = 'disabled'
        else:
            args['text'] = ' '
            if move_allowed:
                args['state'] = 'normal'
            elif not hasattr(self, '_agent'):
                args['state'] = 'normal'

        if terminated:
            args['state'] = 'disabled'
            if of_line:
                if mover == 1:
                    args['disabledforeground'] = 'steelblue'
                elif mover == -1:
                    args['disabledforeground'] = 'tomato'

        button.config(**args)

        return
Esempio n. 25
0
    def _train_against(self,opponent_agent:Callable[[np.ndarray],int],numOfGames:int)->None:

        agent_q_turn = self._is_first_mover
        for _ in tqdm(range(numOfGames)):
            game = TTT(self._size)
            turn = True

            # one complete game :
            # prev state, action taken are from agent's turn
            # next state is from opponent's turn.
            # update in opponent's turn
            encoded_prev_state = None
            move_taken = None
            encoded_next_state = None
            while True:

                if turn is agent_q_turn:
                    # Q turn :
                    if game.is_terminated():
                        break
                    else:
                        possible_moves = game.get_available_positions()
                        encoded_prev_state = game.get_encoded_state()
                        move_taken = self._epsilon_greedy_train(encoded_prev_state,possible_moves)
                        game.put(move_taken)
                        pass
                    pass
                else:
                    # opponent's turn :
                    if not game.is_terminated():
                        state = game.get_state()
                        # move below is considered as random (sampling procedure) :
                        move = opponent_agent(state)
                        game.put(move)
                        pass
                    encoded_next_state = game.get_encoded_state()
                    score = game.get_score()
                    if encoded_prev_state is not None:
                        # : to avoid just after first move case ( in case of Q is second mover )
                        self.update(encoded_prev_state,move_taken,encoded_next_state,score)
                    
                    pass
                
                turn = not turn
            pass
        
        return None
Esempio n. 26
0
class ABPruning:

    def __init__(self,size=3):
        
        self._t = TTT(size)
        self._mode = 'optimal'
        self._penalty_prob = 0

        pass

    def set_penalty(self,penalty_prob=0):
        
        assert type(penalty_prob) == int or type(penalty_prob) == float
        assert penalty_prob >= 0
        assert penalty_prob <= 1
        
        if penalty_prob > 0:
            self._mode = 'modified'
            self._penalty_prob = penalty_prob
        else:
            pass

        return

    def get(self,state:np.ndarray,mover:int)->(Score,Move):

        if self._mode == 'optimal':
            return self._optimal(state,mover)
        elif self._mode == 'modified':
            return self._modified(state,mover)

    def _optimal(self,state,mover:int,alpha=-1000,beta=1000)->(Score,Move):
        
        t = self._t
        next_mover = -1 if mover is 1 else 1
        possible_moves = t.get_available_positions(state)
        best_move = None
        best_score = None

        # maximizer :
        if mover == 1:
            best_score = -1000
            for i in possible_moves:
                next_state = state.copy()
                next_state[i] = mover
                if t.is_terminated(next_state):
                    score = t.get_score(next_state)
                else:
                    [score,_] = self._optimal(next_state,next_mover,alpha,beta)
                
                if score > best_score:
                    best_score = score
                    best_move = i
                    
                    alpha = best_score
                    if alpha >= beta:
                        break
        
        # minimizer :
        elif mover == -1:
            best_score = 1000
            for i in possible_moves:
                next_state = state.copy()
                next_state[i] = mover
                if t.is_terminated(next_state):
                    score = t.get_score(next_state)
                else:
                    [score,_] = self._optimal(next_state,next_mover,alpha,beta)
                
                if score < best_score:
                    best_score = score
                    best_move = i

                    beta = best_score
                    if alpha >= beta:
                        break


        return (best_score, best_move)

    def _modified(self,state,mover:int,alpha=-1000,beta=1000)->(Score,Move):

        t = self._t
        next_mover = -1 if mover is 1 else 1
        possible_moves = self._get_reduced_moves(state)
        best_move = None
        best_score = None

        # maximizer :
        if mover == 1:
            best_score = -1000
            for i in possible_moves:
                next_state = state.copy()
                next_state[i] = mover
                if t.is_terminated(next_state):
                    score = t.get_score(next_state)
                else:
                    [score,_] = self._modified(next_state,next_mover,alpha,beta)
                
                if score > best_score:
                    best_score = score
                    best_move = i
                    
                    alpha = best_score
                    if alpha >= beta:
                        break
        
        # minimizer :
        elif mover == -1:
            best_score = 1000
            for i in possible_moves:
                next_state = state.copy()
                next_state[i] = mover
                if t.is_terminated(next_state):
                    score = t.get_score(next_state)
                else:
                    [score,_] = self._modified(next_state,next_mover,alpha,beta)
                
                if score < best_score:
                    best_score = score
                    best_move = i

                    beta = best_score
                    if alpha >= beta:
                        break


        return (best_score, best_move)

    def _get_reduced_moves(self,state)->tuple:

        all_moves = self._t.get_available_positions(state)
        if len(all_moves) == 0:
            return []
            
        p = 1 - self._penalty_prob
        num_of_moves = int(len(all_moves)*p)
        if num_of_moves == 0:
            num_of_moves = 1

        sample_moves = random.sample(all_moves,num_of_moves)

        return sample_moves
Esempio n. 27
0
def load(name: str, **kwargs) -> Callable[[np.ndarray], int]:

    if name == 'minimax':

        from src.utils.path import Settings
        s = Settings()
        minimax = minimax_load(s.path('minimax'))

        def agent(state: np.ndarray) -> int:
            move = minimax(state)[1]
            return int(move)

        return agent

    elif name == 'alpha_beta':

        assert 'size' in kwargs
        assert 'penalty_prob' in kwargs
        size = kwargs.get('size')
        penalty_prob = kwargs.get('penalty_prob')
        ab = ABPruning(size)
        ab.set_penalty(penalty_prob)
        t = TTT(size)

        def agent(state: np.ndarray) -> int:
            mover = t.get_mover(state=state)
            inferred = ab.get(state=state, mover=mover)[1]
            return inferred

        return agent

    elif name == 'random':

        assert 'size' in kwargs
        import random
        size = kwargs.get('size')
        t = TTT(size)

        def agent(state: np.ndarray) -> int:
            possible_moves = t.get_available_positions(state)
            nums = len(possible_moves)
            random_index = random.randint(0, nums - 1)
            return int(possible_moves[random_index])

        return agent

    elif name == 'tabular_q':

        assert 'id' in kwargs
        id = kwargs.get('id')
        q = TabularQ.load(id)
        size = q._size
        t = TTT(size)

        def agent(state: np.ndarray) -> int:
            possible_moves = t.get_available_positions(state)
            encoded_state = t.get_encoded_state(state)
            mover = t.get_mover(state=state)
            inferred = q.infer(encoded_state, possible_moves, mover)
            return inferred

        return agent

    else:
        raise NameError(f'{name} is not implemented')
Esempio n. 28
0
 def test_state_encode(self):
     t3 = TTT(3)
     encoded01 = t3.get_encoded_state()
     state02 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int)
     encoded02 = t3.get_encoded_state(state02)
     self.assertEqual(encoded01, encoded02)
Esempio n. 29
0
    def test_game1(self):
        #  0 1 0
        # -1 1 0
        # -1 1 0
        t = TTT(3)
        result = t.get_result()
        self.assertDictEqual(result, {'terminated': False, 'score': 0})

        t.put(1)
        result = t.get_result()
        self.assertDictEqual(result, {'terminated': False, 'score': 0})

        t.put(3)
        result = t.get_result()
        self.assertDictEqual(result, {'terminated': False, 'score': 0})

        t.put(4)
        result = t.get_result()
        self.assertDictEqual(result, {'terminated': False, 'score': 0})

        t.put(6)
        result = t.get_result()
        self.assertDictEqual(result, {'terminated': False, 'score': 0})

        t.put(7)
        result = t.get_result()
        self.assertDictEqual(result, {'terminated': True, 'score': 5})

        return
Esempio n. 30
0
 def test_availables(self):
     t3 = TTT(3)
     s3 = [[1, -1, 0], [0, 1, -1], [1, -1, 0]]
     s3 = np.array(s3).reshape(-1)
     indices = t3.get_available_positions(s3)
     self.assertListEqual(indices, [2, 3, 8])