def test_alphabeta_vs_penalty(self): t = TTT(3) player1 = ABPruning(3) player2 = ABPruning(3) player2.set_penalty(0.2) scores = {-4: 0, -2: 0, 0: 0, 1: 0, 3: 0, 5: 0} game_played = 0 while game_played < 11: if t.is_terminated(): score = t.get_score() scores[score] += 1 game_played += 1 t = TTT(3) pass mover = t.get_mover() if mover == 1: [_, move] = player1.get(t.get_state(), mover) t.put(move) elif mover == -1: [_, move] = player2.get(t.get_state(), mover) t.put(move) pass print(scores) wrong_cases = scores[-4] + scores[-2] self.assertTrue(wrong_cases == 0)
def test_diagonal1(self): t3 = TTT(3) s3 = np.array([1, 0, -1, 0, 1, -1, 0, 0, 1]) self.assertTrue(t3.is_terminated(s3)) self.assertEqual(t3.check_winner(s3), { 'winner': 1, 'lines': [[0, 4, 8]] }) t4 = TTT(4) s4 = np.array([-1, 0, 0, 1, 0, -1, 0, 1, 0, 1, -1, 0, 1, 0, 0, -1]) self.assertTrue(t4.is_terminated(s4)) self.assertEqual(t4.check_winner(s4), { 'winner': -1, 'lines': [[0, 5, 10, 15]] })
def test_diagonal2(self): t3 = TTT(3) s3 = np.array([1, 0, -1, 1, -1, 0, -1, 1, 0]) self.assertEqual(t3.check_winner(s3), { 'winner': -1, 'lines': [[2, 4, 6]] }) self.assertTrue(t3.is_terminated(s3)) t4 = TTT(4) s4 = np.array([-1, 0, 0, 1, -1, 0, 1, 0, -1, 1, 0, 0, 1, 0, 0, 0]) self.assertTrue(t4.is_terminated(s4)) self.assertEqual(t4.check_winner(s4), { 'winner': 1, 'lines': [[3, 6, 9, 12]] })
def __init__(self,size=3): self._t = TTT(size) self._mode = 'optimal' self._penalty_prob = 0 pass
def test_as_second_mover(self): parameters = { "ep_train": 0.5, "ep_infer": 0, "gamma": 1, "alpha": 1, "agent_for": 'minimizer', } q = TabularQ(3) q.set_params(**parameters) opponent_agent = load('minimax') q.train(numOfGames=500, opponent_agent=opponent_agent) t = TTT(3) Q = q._Q updated_state_indices = np.where( Q != [0, 0, 0, 0, 0, 0, 0, 0, 0])[0] # [0] for row indices updated_state_indices = set(updated_state_indices) for i in updated_state_indices: state = q.get_state(i) mover = t.get_mover(state=state) self.assertEqual(mover, -1) return
def test_update(self): t = TTT(3) prev_state = [[1, 1, 0], [-1, -1, 0], [0, 0, 0]] next_state = [[1, 1, 1], [-1, -1, 0], [0, 0, 0]] prev_state = np.array(prev_state).reshape(-1) next_state = np.array(next_state).reshape(-1) result = t.get_result(next_state) self.assertEqual(result, {'terminated': True, 'score': 5}) q = TabularQ(3) q.set_params(alpha=1, gamma=1) encoded_prev_state = t.get_encoded_state(prev_state) prev_state_index = q.get_index(encoded_prev_state) encoded_next_state = t.get_encoded_state(next_state) next_state_index = q.get_index(encoded_next_state) self.assertEqual(next_state_index, None) q.update(encoded_prev_state, 2, encoded_next_state, 5) updated_row = q._Q[prev_state_index, :] check_row = np.array_equal(updated_row, [0, 0, 5, 0, 0, 0, 0, 0, 0]) self.assertTrue(check_row) # test correct inference : q._is_first_mover = True possible_moves = t.get_available_positions(prev_state) inferred = q.infer(encoded_prev_state, possible_moves, 1) self.assertEqual(inferred, 2) pass
def test_game1(self): # 0 1 0 # -1 1 0 # -1 1 0 t = TTT(3) result = t.get_result() self.assertDictEqual(result, {'terminated': False, 'score': 0}) t.put(1) result = t.get_result() self.assertDictEqual(result, {'terminated': False, 'score': 0}) t.put(3) result = t.get_result() self.assertDictEqual(result, {'terminated': False, 'score': 0}) t.put(4) result = t.get_result() self.assertDictEqual(result, {'terminated': False, 'score': 0}) t.put(6) result = t.get_result() self.assertDictEqual(result, {'terminated': False, 'score': 0}) t.put(7) result = t.get_result() self.assertDictEqual(result, {'terminated': True, 'score': 5}) return
def test_column(self): t4 = TTT(4) s4 = np.array([1, 0, 0, -1, 0, 1, 0, -1, 1, 0, 0, -1, 0, 1, 0, -1]) self.assertEqual(t4.check_winner(s4), { 'winner': -1, 'lines': [[3, 7, 11, 15]] }) self.assertTrue(t4.is_terminated(s4))
def test_row(self): t3 = TTT(3) s3 = np.array([0, 0, 0, -1, -1, 0, 1, 1, 1]) self.assertEqual(t3.check_winner(s3), { 'winner': 1, 'lines': [[6, 7, 8]] }) self.assertTrue(t3.is_terminated(s3))
def test_get_mover(self): t = TTT(3) s = [[0, 1, 0], [0, -1, 0], [0, 0, 0]] s = np.array(s) s = s.reshape(-1) mover = t.get_mover(state=s) self.assertTrue(mover == 1)
def initialize_minimax(filepath: str, size=3): table = {} t = TTT(size) minimax_save(t.get_state(), t.get_mover(), t, table) with open(filepath, 'wb') as f: pickle.dump(table, f) return
def test_score1(self): t3 = TTT(3) s = [[1, -1, 0], [-1, 1, 0], [0, 0, 1]] s = np.array(s).reshape(-1) terminated = t3.is_terminated(s) score = t3.get_score(s) self.assertTrue(terminated) self.assertEqual(score, 5)
def test_minimax_1(self): t = TTT(3) state = [[1, -1, 0], [-1, 1, 0], [0, 0, 0]] state = np.array(state, dtype='int') state = state.reshape(-1) t._state = state [score, move] = minimax(t.get_state(), 1, t) self.assertListEqual(list(state), list(t.get_state())) self.assertEqual(8, move) self.assertEqual(5, score)
def run_game(self, agent1, agent2, size=3): t = TTT(size) for i in range(size * size): agent = agent1 if i % 2 == 0 else agent2 inferred = agent(t.get_state()) t.put(inferred) if t.is_terminated(): break return t.get_result()
def test_alpha_beta_1(self): t = TTT(3) player = ABPruning(3) state = [[1, -1, 0], [-1, 1, 0], [0, 0, 0]] state = np.array(state, dtype='int') state = state.reshape(-1) t._state = state [score, move] = player.get(t.get_state(), 1) self.assertListEqual(list(state), list(t.get_state())) self.assertEqual(8, move) self.assertEqual(5, score)
def test_result1(self): t3 = TTT(3) s = [[1, -1, -1], [-1, 1, 1], [1, -1, 1]] s = np.array(s).reshape(-1) result = t3.get_result(s) to_equal = { 'terminated': True, 'score': 1, 'winner': 1, 'lines': [[0, 4, 8]] } self.assertDictEqual(result, to_equal)
def test_set_state(self): t = TTT(3) state = [1, 0, 0, 1, 1, 0, -1, -1, 0] t.set_state(state) mover = t.get_mover() order = t._order num_of_moves = t._num_moves _state = np.array(state, dtype=int) self.assertEqual(mover, -1) self.assertEqual(order, False) self.assertEqual(num_of_moves, 5) self.assertTrue(np.array_equal(_state, t._state))
def test_penalty_vs_penalty(self): t = TTT(3) player1 = ABPruning(3) player1.set_penalty(0.7) player2 = ABPruning(3) player2.set_penalty(0.7) games_played = 1 scores = set() case1 = {1, 2, 3, 4} case2 = {-1, -2, -3} case3 = {0} while True: if t.is_terminated(): score = t.get_score() scores.add(score) # check whether if win,draw,lose all happened wins = case1 & scores loses = case2 & scores draw = case3 & scores if len(wins) > 0: if len(loses) > 0: if len(draw) > 0: break t = TTT(3) games_played += 1 pass mover = t.get_mover() if mover == 1: [_, move] = player1.get(t.get_state(), mover) t.put(move) elif mover == -1: [_, move] = player2.get(t.get_state(), mover) t.put(move) self.assertTrue(len(scores) > 2)
def initialize_state_indices(filepath: str, size=3): table = {'current': 0} # store state:index pair t = TTT(size) def dfs(state, mover: int, table=table) -> None: # store if the state is new one : encoded_state = t.get_encoded_state(state) if not encoded_state in table: table[encoded_state] = table['current'] table['current'] += 1 assert type(table[encoded_state]) is int next_mover = 1 if mover is -1 else -1 available_moves = t.get_available_positions(state) for i in available_moves: next_state = state.copy() next_state[i] = mover if not t.is_terminated(next_state): dfs(next_state, next_mover) return # indexing start : initial_mover = t.get_mover() initial_state = t.get_state() print('indexing start :') dfs(initial_state, initial_mover) # simple validate : num_visited = table['current'] del (table['current']) num_stored = len(table) print(f'visited states : {num_visited}') print(f'stored states : {num_stored}') assert num_stored == num_visited indices = set(table.values()) assert len(indices) == len(table) sample_index = list(table.values())[1] assert type(sample_index) is int # save : print('saving... ', end='') with open(filepath, 'wb') as f: pickle.dump(table, f) print('done') return
def _train_against(self,opponent_agent:Callable[[np.ndarray],int],numOfGames:int)->None: agent_q_turn = self._is_first_mover for _ in tqdm(range(numOfGames)): game = TTT(self._size) turn = True # one complete game : # prev state, action taken are from agent's turn # next state is from opponent's turn. # update in opponent's turn encoded_prev_state = None move_taken = None encoded_next_state = None while True: if turn is agent_q_turn: # Q turn : if game.is_terminated(): break else: possible_moves = game.get_available_positions() encoded_prev_state = game.get_encoded_state() move_taken = self._epsilon_greedy_train(encoded_prev_state,possible_moves) game.put(move_taken) pass pass else: # opponent's turn : if not game.is_terminated(): state = game.get_state() # move below is considered as random (sampling procedure) : move = opponent_agent(state) game.put(move) pass encoded_next_state = game.get_encoded_state() score = game.get_score() if encoded_prev_state is not None: # : to avoid just after first move case ( in case of Q is second mover ) self.update(encoded_prev_state,move_taken,encoded_next_state,score) pass turn = not turn pass return None
def test_minimax_vs_minimax(self): size = 3 t = TTT(size) filepath = 'results/minimax.pk' minimax_loaded = minimax_load(filepath) moves = 0 while True: [_, best_move] = minimax_loaded(t.get_state()) t.put(best_move) moves += 1 if t.is_terminated(): break pass self.assertEqual(t.check_winner()['winner'], 0) pass
def test_alphabeta_vs_alphabeta(self): t = TTT(3) player = ABPruning(3) moves = 0 print('Moves : 0 ', end='') while True: [_, best_move] = player.get(t.get_state(), t.get_mover()) t.put(best_move) moves += 1 print(f'{moves} ', end='') if t.is_terminated(): break pass print('final state') print(t) self.assertEqual(t.check_winner()['winner'], 0)
def _train_both(self,numOfGames): for _ in tqdm(range(numOfGames)): game = TTT(self._size) self._is_first_mover = True # one complete game : while True: encoded_prev_state = game.get_encoded_state() possible_moves = game.get_available_positions() selected_move = self._epsilon_greedy_train(encoded_prev_state,possible_moves) game.put(selected_move) encoded_next_state = game.get_encoded_state() result = game.get_result() self.update(encoded_prev_state,selected_move,encoded_next_state,result['score']) if result['terminated']: break pass pass
def __init__(self, user_first: bool, size=3, *args, **kwargs): super().__init__(*args, **kwargs) # state variables self._user_first = user_first self._t = TTT(size) self._agent: Callable[[np.ndarray], int] self._num_of_moves = 0 self._state_history = [self._t.get_state()] # UI accessors self._history_scale: tk.Scale self._player_labels: Dict[int, tk.Label] # key : 1,2 self._buttons = [] # UI initialization self.title(f'TTT') self._make_top_frame() self._make_board(size) self._make_bottom_frame(size) return
def test_alphabeta_vs_minimax(self): t = TTT(3) minimax_player = minimax_load('results/minimax.pk') alphabeta_player = ABPruning(3) moves = 0 print('Moves : 0 ', end='') while True: if t.get_mover() == 1: [_, best_move] = alphabeta_player.get(t.get_state(), t.get_mover()) elif t.get_mover() == -1: [_, best_move] = minimax_player(t.get_state()) t.put(best_move) moves += 1 print(f'{moves} ', end='') if t.is_terminated(): break pass print('final state') print(t) self.assertEqual(t.check_winner()['winner'], 0)
def test_deterministic_vs_minimax(self): # gamma, alpha == 1 guarantees that for endstates s and optimal move a, # Q(s,a) = R(s,a) IF Q(s,a) IS NOT 0 # Here, R(s,a) is the score of the terminated state parameters = { "ep_train": 0.5, "ep_infer": 0, "gamma": 1, "alpha": 1, "agent_for": 'both', } q = TabularQ(3) q.set_params(**parameters) q.train(numOfGames=500) s = Settings() minimax = minimax_load(s.path('minimax')) t = TTT(3) Q = q._Q to_check_state_indices = np.where(Q != [0, 0, 0, 0, 0, 0, 0, 0, 0])[0] to_check_state_indices = map(int, to_check_state_indices) for state_index in to_check_state_indices: self.assertFalse( np.array_equal(Q[state_index], np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]))) state = q.get_state(state_index) encoded_state = t.get_encoded_state(state) mover = t.get_mover(state=state) possible_moves = t.get_available_positions(state) if mover == 1: best_move_q = np.argmax(Q[state_index]) if int(Q[state_index, best_move_q]) is not 0: move_inferred = q.infer(encoded_state, possible_moves, mover) q_value_1 = Q[state_index, best_move_q] q_value_2 = Q[state_index, move_inferred] self.assertEqual(q_value_1, q_value_2) elif mover == -1: best_move_q = np.argmin(Q[state_index]) if int(Q[state_index, best_move_q]) is not 0: move_inferred = q.infer(encoded_state, possible_moves, mover) q_value_1 = Q[state_index, best_move_q] q_value_2 = Q[state_index, move_inferred] self.assertEqual(q_value_1, q_value_2) next_state = state.copy() next_state[best_move_q] = mover result = t.get_result(next_state) if result['terminated']: best_score, _ = minimax(state) q_value = Q[state_index, best_move_q] if best_score != q_value: # not yet sampled (s,a) # or withdraw case self.assertEqual(q_value, 0) else: # sampled (s,a) self.assertEqual(best_score, q_value) pass
def load(name: str, **kwargs) -> Callable[[np.ndarray], int]: if name == 'minimax': from src.utils.path import Settings s = Settings() minimax = minimax_load(s.path('minimax')) def agent(state: np.ndarray) -> int: move = minimax(state)[1] return int(move) return agent elif name == 'alpha_beta': assert 'size' in kwargs assert 'penalty_prob' in kwargs size = kwargs.get('size') penalty_prob = kwargs.get('penalty_prob') ab = ABPruning(size) ab.set_penalty(penalty_prob) t = TTT(size) def agent(state: np.ndarray) -> int: mover = t.get_mover(state=state) inferred = ab.get(state=state, mover=mover)[1] return inferred return agent elif name == 'random': assert 'size' in kwargs import random size = kwargs.get('size') t = TTT(size) def agent(state: np.ndarray) -> int: possible_moves = t.get_available_positions(state) nums = len(possible_moves) random_index = random.randint(0, nums - 1) return int(possible_moves[random_index]) return agent elif name == 'tabular_q': assert 'id' in kwargs id = kwargs.get('id') q = TabularQ.load(id) size = q._size t = TTT(size) def agent(state: np.ndarray) -> int: possible_moves = t.get_available_positions(state) encoded_state = t.get_encoded_state(state) mover = t.get_mover(state=state) inferred = q.infer(encoded_state, possible_moves, mover) return inferred return agent else: raise NameError(f'{name} is not implemented')
def test_availables(self): t3 = TTT(3) s3 = [[1, -1, 0], [0, 1, -1], [1, -1, 0]] s3 = np.array(s3).reshape(-1) indices = t3.get_available_positions(s3) self.assertListEqual(indices, [2, 3, 8])
def test_state_encode(self): t3 = TTT(3) encoded01 = t3.get_encoded_state() state02 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int) encoded02 = t3.get_encoded_state(state02) self.assertEqual(encoded01, encoded02)