def print_q_values(self): print(f"num q_values = {len(self.qtable.cache)}") for k, v in self.qtable.cache.items(): b = np.frombuffer(k, dtype=int) board = Board(b) board.print_board() print(f"qvalue = {v}")
def test_get_game_result_not_over(): b = np.array([[1, 1, -1], [0, -1, 0], [1, -1, 1]]).flatten() board = Board(b) result = board.get_game_result() assert result == RESULT_NOT_OVER
def test_get_game_result_o_wins(): b = np.array([[1, 0, -1], [0, -1, 1], [-1, 0, 1]]).flatten() board = Board(b) result = board.get_game_result() assert result == RESULT_O_WINS
def test_get_game_result_draw(): b = np.array([[1, 1, -1], [-1, -1, 1], [1, -1, 1]]).flatten() board = Board(b) result = board.get_game_result() assert result == RESULT_DRAW
def test_get_position_value_from_cache(): b = np.array([[1, 0, 0], [1, -1, 1], [0, 0, -1]]).flatten() value, found = cache.get_for_position(Board(b)) assert (value, found) == (None, False) cache.set_for_position(Board(b), -1) (value, _), found = cache.get_for_position(Board(b)) assert (value, found) == (-1, True)
def test_play_minimax_move_o_wins_in_best_case(): b = np.array([[1, 0, 0], [1, -1, 1], [0, 0, -1]]).flatten() result = play_minimax_move(Board(b)).board assert np.array_equal( result, np.array([[1, 0, 0], [1, -1, 1], [-1, 0, -1]]).flatten())
def test_choose_move_index_with_transformation(): b_2d = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]) b = b_2d.flatten() board = Board(b) q_table = QTable() q_table.update_q_value(board, 1, -1) q_table.update_q_value(board, 2, 1) b_transformed = np.rot90(b_2d, 2).flatten() board_transformed = Board(b_transformed) move_index = choose_move_index([q_table], board_transformed, 0) assert move_index == 6
def transform_board_and_qvalues(board, q_values, transform): b_2d_transformed = transform.transform(board.board_2d) q_2d = load_q_values_into_2d_board(q_values) q_2d_transformed = transform.transform(q_2d) q_values_transformed = dict([ (mi, qv) for (mi, qv) in enumerate(q_2d_transformed.flatten()) if not np.isnan(qv) ]) return Board(b_2d_transformed.flatten()), q_values_transformed
def test_convert_to_tensor(): b = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]).flatten() board = Board(b) t = torch.tensor(board.board, dtype=torch.float) t = convert_to_tensor(board) t_expected = torch.tensor([1., 0., 0., 1., -1., 1., -1., 1., -1.]) assert torch.all(torch.eq(t_expected, t))
def test_choose_move_index_2nd_move(): b = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]).flatten() board = Board(b) q_table = QTable() q_table.update_q_value(board, 1, 0.5) q_table.update_q_value(board, 2, 1) action_index = choose_move_index([q_table], board, 0) assert action_index == 2
def update_q_value(self, board, move_index, qvalue): new_position = board.play_move(move_index) result, found = self.qtable.get_for_position(new_position) if found is True: _, t = result new_position_transformed = Board( t.transform(new_position.board_2d).flatten()) self.qtable.set_for_position(new_position_transformed, qvalue) return self.qtable.set_for_position(new_position, qvalue)
def test_get_q_values_initial_o_turn(): b = np.array([[1, 0, -1], [1, 0, -1], [1, 0, 0]]).flatten() q_table = QTable() q_values = q_table.get_q_values(Board(b)) expected_q_values = {1: INITIAL_Q_VALUES_FOR_O, 4: INITIAL_Q_VALUES_FOR_O, 7: INITIAL_Q_VALUES_FOR_O, 8: INITIAL_Q_VALUES_FOR_O} assert q_values == expected_q_values
def test_update_q_value(): qtable = QTable() b_2d = np.array([[1.0, 0.0, 0.0], [1.0, -1.0, 0.0], [0.0, 1.0, -1.0]]) b = b_2d.flatten() board = Board(b) qvalues = qtable.get_q_values(board) init = INITIAL_Q_VALUES_FOR_O expected_qvalues = {1: init, 2: init, 5: init, 6: init} assert qvalues == expected_qvalues b_rot90_flipud_2d = np.flipud(np.rot90(b_2d)) b_rot90_flipud = b_rot90_flipud_2d.flatten() board_rot90_flipud = Board(b_rot90_flipud) qtable.update_q_value(board_rot90_flipud, 2, 0.8) qtable.update_q_value(board_rot90_flipud, 7, 0.7) assert len(qtable.qtable.cache) == 2 expected_qvalues = {1: init, 2: init, 5: 0.7, 6: 0.8} qvalues = qtable.get_q_values(board) assert qvalues == expected_qvalues expected_qvalues = {2: 0.8, 3: init, 6: init, 7: 0.7} qvalues = qtable.get_q_values(board_rot90_flipud) assert qvalues == expected_qvalues
def test_play_qneural_move(): net = TicTacNet() target_net = TicTacNet() sgd = torch.optim.SGD(net.parameters(), lr=0.1, weight_decay=0) loss_function = MSELoss() net_context = NetContext(net, target_net, sgd, loss_function) play = create_qneural_player(net_context) b = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]).flatten() board = Board(b) updated_board = play(board) assert np.array_equal(updated_board.board, np.array([1, -1, 0, 1, -1, 1, -1, 1, -1]))
def test_get_move_average_q_value_pairs(): qtable_a = QTable() qtable_b = QTable() b_2d = np.array([[1, 0, 0], [1, -1, 1], [-1, 1, -1]]) b = b_2d.flatten() board = Board(b) qtable_a.update_q_value(board, 1, 0.0) qtable_a.update_q_value(board, 2, 1.0) qtable_b.update_q_value(board, 1, -0.5) qtable_b.update_q_value(board, 2, 0.5) pairs = get_move_average_q_value_pairs([qtable_a, qtable_b], board) assert pairs == [(1, -0.25), (2, 0.75)]
from tictac.minimax import create_minimax_player from tictac.qneural import (TicTacNet, NetContext, create_qneural_player, get_q_values, play_training_games_x, play_training_games_o) play_minimax_move_randomized = create_minimax_player(True) play_minimax_move_not_randomized = create_minimax_player(False) policy_net = TicTacNet() target_net = TicTacNet() sgd = torch.optim.SGD(policy_net.parameters(), lr=0.1) loss = MSELoss() net_context = NetContext(policy_net, target_net, sgd, loss) with torch.no_grad(): board = Board(np.array([1, -1, -1, 0, 1, 1, 0, 0, -1])) q_values = get_q_values(board, net_context.target_net) print(f"Before training q_values = {q_values}") print("Training qlearning X vs. random...") play_training_games_x(net_context=net_context, o_strategies=[play_random_move]) print("Training qlearning O vs. random...") play_training_games_o(net_context=net_context, x_strategies=[play_random_move]) print("") with torch.no_grad(): play_qneural_move = create_qneural_player(net_context) print("Playing qneural vs random:") print("--------------------------")
def test_get_random_valid_move(): b = np.array([0, -1, 0, 0, -1, 0, 1, 0, 1]) move = Board(b).get_random_valid_move_index() assert move in [0, 2, 3, 5, 7]
def test_get_position_value_o_wins(): b = np.array([[1, 0, -1], [1, 0, -1], [0, 1, -1]]).flatten() value = get_position_value(Board(b)) assert value == RESULT_O_WINS
def test_get_position_value_draw_is_best_case(): b = np.array([[1, -1, 0], [1, 1, -1], [-1, 1, -1]]).flatten() value = get_position_value(Board(b)) assert value == RESULT_DRAW
def test_get_position_value_o_wins_in_best_case_o_turn(): b = np.array([[1, 0, 0], [1, -1, 1], [0, 0, -1]]).flatten() value = get_position_value(Board(b)) assert value == RESULT_O_WINS
def test_get_move_value_pairs_for_position_o_wins_in_best_case(): b = np.array([[1, 0, 0], [1, -1, 1], [0, 0, -1]]).flatten() move_value_pairs = get_move_value_pairs(Board(b)) assert move_value_pairs == [(1, 1), (2, 1), (6, -1), (7, 1)]
def test_play_training_game_x_player(): q_table = QTable() move_history = deque() q_table_player = CELL_X x_strategy = create_training_player([q_table], move_history, 0) o_strategy = play_random_move play_training_game([q_table], move_history, q_table_player, x_strategy, o_strategy, 0.9, 1) init = INITIAL_Q_VALUES_FOR_X first_board = np.copy(new_board) val = 0.9 * 0.81 expected_move_indexes_and_q_values = {0: val, 1: init, 2: val, 3: init, 4: init, 5: init, 6: val, 7: init, 8: val} move_indexes_and_q_values = q_table.get_q_values(Board(first_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values second_board = np.copy(first_board) second_board[0] = CELL_X second_board[7] = CELL_O val = 0.9 * 0.9 expected_move_indexes_and_q_values = {1: val, 2: init, 3: init, 4: init, 5: init, 6: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(second_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values third_board = np.copy(second_board) third_board[1] = CELL_X third_board[5] = CELL_O val = 0.9 * 1.0 expected_move_indexes_and_q_values = {2: val, 3: init, 4: init, 6: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(third_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values move_history = deque() x_strategy = create_training_player([q_table], move_history, 0) play_training_game([q_table], move_history, q_table_player, x_strategy, o_strategy, 0.9, 1) init = INITIAL_Q_VALUES_FOR_X first_board = np.copy(new_board) val = 0.1 * (0.81 * 0.9) + 0.9 * (0.9 * (0.9 * 0.81)) expected_move_indexes_and_q_values = {0: val, 1: init, 2: val, 3: init, 4: init, 5: init, 6: val, 7: init, 8: val} move_indexes_and_q_values = q_table.get_q_values(Board(first_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values second_board = np.copy(first_board) second_board[0] = CELL_X second_board[1] = CELL_O val = 0.9 * (0.9 * 0.81) expected_move_indexes_and_q_values = {2: val, 3: init, 4: init, 5: init, 6: init, 7: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(second_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values third_board = np.copy(second_board) third_board[2] = CELL_X third_board[5] = CELL_O val = 0.9 * 0.81 expected_move_indexes_and_q_values = {3: val, 4: init, 6: init, 7: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(third_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values fourth_board = np.copy(third_board) fourth_board[3] = CELL_X fourth_board[8] = CELL_O val = 0.81 expected_move_indexes_and_q_values = {4: val, 6: init, 7: init} move_indexes_and_q_values = q_table.get_q_values(Board(fourth_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values fifth_board = np.copy(fourth_board) fifth_board[4] = CELL_X fifth_board[7] = CELL_O val = 0.9 expected_move_indexes_and_q_values = {6: val} move_indexes_and_q_values = q_table.get_q_values(Board(fifth_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values
def perform_training_playouts(node_cache=nodecache, board=Board(), num_playouts=4000, display_progress=True): for game in range(num_playouts): perform_game_playout(node_cache, board) if display_progress is True and (game+1) % (num_playouts / 10) == 0: print(f"{game+1}/{num_playouts} playouts...")
def test_play_training_game_o_player(): q_table = QTable() move_history = deque() q_table_player = CELL_O x_strategy = play_random_move o_strategy = create_training_player([q_table], move_history, 0) play_training_game([q_table], move_history, q_table_player, x_strategy, o_strategy, 0.9, 1) init = INITIAL_Q_VALUES_FOR_O first_board = np.copy(new_board) first_board[6] = CELL_X val = 0.9 * 0.81 expected_move_indexes_and_q_values = {0: val, 1: init, 2: init, 3: init, 4: init, 5: init, 7: init, 8: val} move_indexes_and_q_values = q_table.get_q_values(Board(first_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values second_board = np.copy(first_board) second_board[0] = CELL_O second_board[8] = CELL_X val = 0.9 * 0.9 expected_move_indexes_and_q_values = {1: val, 2: init, 3: init, 4: init, 5: init, 7: init} move_indexes_and_q_values = q_table.get_q_values(Board(second_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values third_board = np.copy(second_board) third_board[1] = CELL_O third_board[5] = CELL_X val = 0.9 * 1.0 expected_move_indexes_and_q_values = {2: val, 3: init, 4: init, 7: init} move_indexes_and_q_values = q_table.get_q_values(Board(third_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values move_history = deque() o_strategy = create_training_player([q_table], move_history, 0) play_training_game([q_table], move_history, q_table_player, x_strategy, o_strategy, 0.9, 1) init = INITIAL_Q_VALUES_FOR_O first_board = np.copy(new_board) first_board[0] = CELL_X val = (1 - 0.9) * (0.9 * 0.81) + (0.9 * 0.0) expected_move_indexes_and_q_values = {1: init, 2: val, 3: init, 4: init, 5: init, 6: val, 7: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(first_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values second_board = np.copy(first_board) second_board[2] = CELL_O second_board[4] = CELL_X val = 0.9 * -1 expected_move_indexes_and_q_values = {1: val, 3: init, 5: init, 6: init, 7: init, 8: init} move_indexes_and_q_values = q_table.get_q_values(Board(second_board)) assert move_indexes_and_q_values == expected_move_indexes_and_q_values
def test_play_mcts_move(): b_2d = np.array([[1, 1, 0], [1, -1, 0], [-1, 1, -1]]) b = b_2d.flatten() board = Board(b) nc = BoardCache() parent_node = find_or_create_node(nc, board) actual_stats = (parent_node.visits, parent_node.wins, parent_node.draws, parent_node.losses) assert actual_stats == (0, 0, 0, 0) values = calculate_values(nc, board) expected_values = [(2, math.inf), (5, math.inf)] assert list(values) == expected_values perform_game_playout(nc, board) actual_stats = (parent_node.visits, parent_node.wins, parent_node.draws, parent_node.losses) assert actual_stats == (1, 0, 0, 1) child_node_2 = find_or_create_node(nc, board.play_move(2)) actual_stats = (child_node_2.visits, child_node_2.wins, child_node_2.draws, child_node_2.losses) assert actual_stats == (1, 1, 0, 0) child_node_5 = find_or_create_node(nc, board.play_move(5)) actual_stats = (child_node_5.visits, child_node_5.wins, child_node_5.draws, child_node_5.losses) assert actual_stats == (0, 0, 0, 0) values = calculate_values(nc, board) expected_values = [(2, 1.0), (5, math.inf)] assert list(values) == expected_values perform_game_playout(nc, board) actual_stats = (parent_node.visits, parent_node.wins, parent_node.draws, parent_node.losses) assert actual_stats == (2, 1, 0, 1) actual_stats = (child_node_2.visits, child_node_2.wins, child_node_2.draws, child_node_2.losses) assert actual_stats == (1, 1, 0, 0) actual_stats = (child_node_5.visits, child_node_5.wins, child_node_5.draws, child_node_5.losses) assert actual_stats == (1, 0, 0, 1) values = calculate_values(nc, board) expected_values = [(2, 2.177410022515475), (5, 1.1774100225154747)] assert list(values) == expected_values perform_training_playouts(nc, board, 100, False) actual_stats = (parent_node.visits, parent_node.wins, parent_node.draws, parent_node.losses) assert actual_stats == (102, 6, 0, 96) actual_stats = (child_node_2.visits, child_node_2.wins, child_node_2.draws, child_node_2.losses) assert actual_stats == (96, 96, 0, 0) actual_stats = (child_node_5.visits, child_node_5.wins, child_node_5.draws, child_node_5.losses) assert actual_stats == (6, 0, 0, 6) values = calculate_values(nc, board) expected_values = [(2, 1.3104087632087014), (5, 1.2416350528348057)] assert list(values) == expected_values
def play(): """ The process of playing a game """ b = Board() print(b) result = b.check_the_Board() while result: player1 = 1 player2 = 2 if b.check_haveplace() != 0: print('Player 1') try: first = get_coords() except ValueError as e: print(e) first = get_coords() if first != False: try: b.put_move(1, first) except IndexError as e: print(e) print('Player 1') first = get_coords() b.put_move(1, first) print(b) if b.check_the_Board() in [ 'EQUITY noone won', 'winner is first player', 'winner is second player' ]: result = False if b.check_haveplace() != 0 and result != False: print('Player 2') try: second = get_coords() except ValueError as e: print(e) second = get_coords() if second != False: try: b.put_move(2, second) except IndexError as e: print(e) print('Player 2') second = get_coords() b.put_move(2, second) print(b) if b.check_the_Board() in [ 'EQUITY noone won', 'winner is first player', 'winner is second player' ]: result = False if b.check_haveplace() == 0: result = False print(b.check_the_Board())
def test_get_valid_move_indexes(): board = Board(np.array([0, -1, 0, 0, -1, 0, 1, 0, 1])) valid_indexes = board.get_valid_move_indexes() assert valid_indexes == [0, 2, 3, 5, 7]