def test_alphabeta_vs_penalty(self): t = TTT(3) player1 = ABPruning(3) player2 = ABPruning(3) player2.set_penalty(0.2) scores = {-4: 0, -2: 0, 0: 0, 1: 0, 3: 0, 5: 0} game_played = 0 while game_played < 11: if t.is_terminated(): score = t.get_score() scores[score] += 1 game_played += 1 t = TTT(3) pass mover = t.get_mover() if mover == 1: [_, move] = player1.get(t.get_state(), mover) t.put(move) elif mover == -1: [_, move] = player2.get(t.get_state(), mover) t.put(move) pass print(scores) wrong_cases = scores[-4] + scores[-2] self.assertTrue(wrong_cases == 0)
def test_minimax_1(self): t = TTT(3) state = [[1, -1, 0], [-1, 1, 0], [0, 0, 0]] state = np.array(state, dtype='int') state = state.reshape(-1) t._state = state [score, move] = minimax(t.get_state(), 1, t) self.assertListEqual(list(state), list(t.get_state())) self.assertEqual(8, move) self.assertEqual(5, score)
def test_alpha_beta_1(self): t = TTT(3) player = ABPruning(3) state = [[1, -1, 0], [-1, 1, 0], [0, 0, 0]] state = np.array(state, dtype='int') state = state.reshape(-1) t._state = state [score, move] = player.get(t.get_state(), 1) self.assertListEqual(list(state), list(t.get_state())) self.assertEqual(8, move) self.assertEqual(5, score)
def initialize_minimax(filepath: str, size=3): table = {} t = TTT(size) minimax_save(t.get_state(), t.get_mover(), t, table) with open(filepath, 'wb') as f: pickle.dump(table, f) return
def run_game(self, agent1, agent2, size=3): t = TTT(size) for i in range(size * size): agent = agent1 if i % 2 == 0 else agent2 inferred = agent(t.get_state()) t.put(inferred) if t.is_terminated(): break return t.get_result()
def test_penalty_vs_penalty(self): t = TTT(3) player1 = ABPruning(3) player1.set_penalty(0.7) player2 = ABPruning(3) player2.set_penalty(0.7) games_played = 1 scores = set() case1 = {1, 2, 3, 4} case2 = {-1, -2, -3} case3 = {0} while True: if t.is_terminated(): score = t.get_score() scores.add(score) # check whether if win,draw,lose all happened wins = case1 & scores loses = case2 & scores draw = case3 & scores if len(wins) > 0: if len(loses) > 0: if len(draw) > 0: break t = TTT(3) games_played += 1 pass mover = t.get_mover() if mover == 1: [_, move] = player1.get(t.get_state(), mover) t.put(move) elif mover == -1: [_, move] = player2.get(t.get_state(), mover) t.put(move) self.assertTrue(len(scores) > 2)
def initialize_state_indices(filepath: str, size=3): table = {'current': 0} # store state:index pair t = TTT(size) def dfs(state, mover: int, table=table) -> None: # store if the state is new one : encoded_state = t.get_encoded_state(state) if not encoded_state in table: table[encoded_state] = table['current'] table['current'] += 1 assert type(table[encoded_state]) is int next_mover = 1 if mover is -1 else -1 available_moves = t.get_available_positions(state) for i in available_moves: next_state = state.copy() next_state[i] = mover if not t.is_terminated(next_state): dfs(next_state, next_mover) return # indexing start : initial_mover = t.get_mover() initial_state = t.get_state() print('indexing start :') dfs(initial_state, initial_mover) # simple validate : num_visited = table['current'] del (table['current']) num_stored = len(table) print(f'visited states : {num_visited}') print(f'stored states : {num_stored}') assert num_stored == num_visited indices = set(table.values()) assert len(indices) == len(table) sample_index = list(table.values())[1] assert type(sample_index) is int # save : print('saving... ', end='') with open(filepath, 'wb') as f: pickle.dump(table, f) print('done') return
def test_alphabeta_vs_minimax(self): t = TTT(3) minimax_player = minimax_load('results/minimax.pk') alphabeta_player = ABPruning(3) moves = 0 print('Moves : 0 ', end='') while True: if t.get_mover() == 1: [_, best_move] = alphabeta_player.get(t.get_state(), t.get_mover()) elif t.get_mover() == -1: [_, best_move] = minimax_player(t.get_state()) t.put(best_move) moves += 1 print(f'{moves} ', end='') if t.is_terminated(): break pass print('final state') print(t) self.assertEqual(t.check_winner()['winner'], 0)
def _train_against(self,opponent_agent:Callable[[np.ndarray],int],numOfGames:int)->None: agent_q_turn = self._is_first_mover for _ in tqdm(range(numOfGames)): game = TTT(self._size) turn = True # one complete game : # prev state, action taken are from agent's turn # next state is from opponent's turn. # update in opponent's turn encoded_prev_state = None move_taken = None encoded_next_state = None while True: if turn is agent_q_turn: # Q turn : if game.is_terminated(): break else: possible_moves = game.get_available_positions() encoded_prev_state = game.get_encoded_state() move_taken = self._epsilon_greedy_train(encoded_prev_state,possible_moves) game.put(move_taken) pass pass else: # opponent's turn : if not game.is_terminated(): state = game.get_state() # move below is considered as random (sampling procedure) : move = opponent_agent(state) game.put(move) pass encoded_next_state = game.get_encoded_state() score = game.get_score() if encoded_prev_state is not None: # : to avoid just after first move case ( in case of Q is second mover ) self.update(encoded_prev_state,move_taken,encoded_next_state,score) pass turn = not turn pass return None
def test_minimax_vs_minimax(self): size = 3 t = TTT(size) filepath = 'results/minimax.pk' minimax_loaded = minimax_load(filepath) moves = 0 while True: [_, best_move] = minimax_loaded(t.get_state()) t.put(best_move) moves += 1 if t.is_terminated(): break pass self.assertEqual(t.check_winner()['winner'], 0) pass
def test_alphabeta_vs_alphabeta(self): t = TTT(3) player = ABPruning(3) moves = 0 print('Moves : 0 ', end='') while True: [_, best_move] = player.get(t.get_state(), t.get_mover()) t.put(best_move) moves += 1 print(f'{moves} ', end='') if t.is_terminated(): break pass print('final state') print(t) self.assertEqual(t.check_winner()['winner'], 0)
class GameWindow(tk.Toplevel): """Game UI""" def __init__(self, user_first: bool, size=3, *args, **kwargs): super().__init__(*args, **kwargs) # state variables self._user_first = user_first self._t = TTT(size) self._agent: Callable[[np.ndarray], int] self._num_of_moves = 0 self._state_history = [self._t.get_state()] # UI accessors self._history_scale: tk.Scale self._player_labels: Dict[int, tk.Label] # key : 1,2 self._buttons = [] # UI initialization self.title(f'TTT') self._make_top_frame() self._make_board(size) self._make_bottom_frame(size) return #region Public Methods def set_agent(self, agent: Callable[[np.ndarray], int], name: str) -> None: self._agent = agent return def get_result(self) -> dict: return self._t.get_result() #endregion #region Put UI Components def _make_top_frame(self): frame = tk.Frame(self) if self._user_first: text1 = 'O : User' text2 = 'X : AI' else: text1 = 'O : AI' text2 = 'X : User' label1 = tk.Label(frame, text=text1) label2 = tk.Label(frame, text=text2) label1.pack() label2.pack() frame.pack() return def _make_board(self, size): board = tk.Frame(self) buttons = self._buttons num_of_buttons = size * size for i in range(num_of_buttons): b = tk.Button(board, width=3, height=1, font=('Helvetica', 30), activebackground='white', command=lambda num=i: self._on_click_board(num)) buttons.append(b) b.grid(column=i % size, row=int(i / size)) pass board.pack() return def _make_bottom_frame(self, size): frame = tk.Frame(self) history_scale = tk.Scale(frame, command=self._on_scale_move, orient='horizontal', from_=0, to=0) history_scale.grid(row=0, columnspan=2) self._history_scale = history_scale restart_button = tk.Button(frame, text="Restart", command=self._on_click_reset) exit_button = tk.Button(frame, text="Exit", command=self.destroy) restart_button.grid(row=1, column=0) exit_button.grid(row=1, column=1) frame.pack() return #endregion #region Event Handlers def _on_click_board(self, position: int): state_num = int(self._history_scale.get()) is_rewinded = not (self._num_of_moves == state_num) if is_rewinded: # reset the game to the rewinded one : state_to_force = self._state_history[state_num] self._t.set_state(state_to_force) self._num_of_moves = self._t._num_moves self._state_history = self._state_history[0:(self._num_of_moves + 1)] pass self._t.put(position) current_state = self._t.get_state() self._state_history.append(current_state) self._num_of_moves += 1 self._history_scale.configure(to=self._num_of_moves) self._history_scale.set(self._num_of_moves) """ [issue] If this procedure is called by button.invoke() then it doesn't invoke the scale's command _on_scale_move. So call it manually (and hence, called twice in user's turn) : """ self._on_scale_move(self._num_of_moves) return def _on_scale_move(self, state_num): state_num = int(state_num) first_mover_turn = True if state_num % 2 == 0 else False user_turn = first_mover_turn == self._user_first self._set_board(state_num, user_turn) if self.get_result()['terminated']: return if state_num == len(self._state_history) - 1: if user_turn: pass else: if hasattr(self, '_agent'): self._on_agent_turn(state_num) pass else: # : agent's turn but it's a previous state pass return def _on_click_reset(self): self._num_of_moves = 0 self._state_history = self._state_history[0:1] self._t.set_state(self._state_history[0]) self._history_scale.configure(to=0) self._history_scale.set(0) self._set_board(0, self._user_first == True) return #endregion #region Private Methods def _on_agent_turn(self, state_num: int): # TODO : async progress bar state = self._state_history[state_num] move = self._agent(state) button = self._buttons[move] button.configure(state='normal') button.invoke() return def _set_board(self, state_num: int, user_turn: bool): """Modify board UI""" to_state = self._state_history[state_num] result = self._t.get_result(to_state) terminated = result['terminated'] lines = result['lines'] lines = sum(lines, []) # flattening for p in range(len(to_state)): move = int(to_state[p]) of_line = p in lines self._modify_button(p, move, user_turn, terminated, of_line) return def _modify_button(self, button_position: int, mover: int, move_allowed: bool, terminated=False, of_line=False): button = self._buttons[button_position] args = {'disabledforeground': 'black', 'state': 'disabled'} if mover == 1: args['text'] = '○' args['state'] = 'disabled' elif mover == -1: args['text'] = '×' args['state'] = 'disabled' else: args['text'] = ' ' if move_allowed: args['state'] = 'normal' elif not hasattr(self, '_agent'): args['state'] = 'normal' if terminated: args['state'] = 'disabled' if of_line: if mover == 1: args['disabledforeground'] = 'steelblue' elif mover == -1: args['disabledforeground'] = 'tomato' button.config(**args) return