class TestWinner(unittest.TestCase): def setUp(self): self.game_unittest = TicTacToe() def test_row_unittest(self): print("Checking row") self.game_unittest.board = [ "X", "0", "X", " ", "0", "0", "X", "0", "X" ] self.assertTrue(self.game_unittest.winner(2, "0")) print(self.game_unittest.board) def test_column_unittest(self): print("Checking column") self.game_unittest.board = [ "X", "0", "X", " ", "0", "0", "0", "0", "X" ] self.assertTrue(self.game_unittest.winner(5, "0")) print(self.game_unittest.board) def test_diagonal_unittest(self): print("Checking diagonal") self.game_unittest.board = [ "X", "0", "X", " ", "X", "0", "0", "0", "X" ] self.assertTrue(self.game_unittest.winner(8, "X")) print(self.game_unittest.board) def other_conditions(self): print("None of checks are true") self.game_unittest.board = ["X", "0", "X" "0", "X", "0" "0", "X", " "] self.assertFalse(self.game_unittest.winner(1, "X"))
def chose_action(self, board_state_hash): state = State.hash_to_state(board_state_hash) action = '' while action == '': TicTacToe.print_board(state) action = input('Your move? i.e. x,y : ') return (int(action.split(',')[0]), int(action.split(',')[1]))
def main(): game = TicTacToe() game.start() while not game.over: game.turn()
def __init__(self, ep=np.sqrt(2), numplayouts=20, movetime=45): self.gt = GameTree() self.ep = ep self.numpl = numplayouts self.movet = movetime self.p1 = SophisticatedRandomPlayer() self.game = TicTacToe(self.p1, self.p1, verbose=False)
def end_of_episode(self, winner, board_state_hash): state = State.hash_to_state(board_state_hash) TicTacToe.print_board(state) if winner == -1: print('Game over! It was a draw.') else: print('Game over! Winner: Player {0}'.format(winner))
class Test3(unittest.TestCase): def setUp(self): print('Starting a new test ...') self.game1 = TicTacToe(configparam=('C', 'C', "X", "O",1,0, 'ComputerVComputer')) self.game1.start_game() def test_game_1_0(self): self.assertTrue(self.game1.board.game_state in ["win", "tie"])
def train_agent(iterations): try: p1 = Agent( 1, # Player # -10, # Loss Value 0.2, # Epsilon (exploration rate) 0.2, # Alpha (learning rate) 0.9, # Gamma (make infinite sum finite) - https://stats.stackexchange.com/questions/221402/understanding-the-role-of-the-discount-factor-in-reinforcement-learning True, # Update learner ) p2 = p1 # p2 = Agent(2, -1, 1, 0.1, 0.9, False) # Random Agent series = ['P1_Wins', 'P2_Wins', 'Draw'] f = open('results.csv', 'w') writer = csv.writer(f) writer.writerow(series) p1_wins = 0 p2_wins = 0 draw = 0 iterator = 1 for i in trange(iterations): if iterator % 2 == 0: winner, board_state_hash = TicTacToe.play(p1, p2) else: winner, board_state_hash = TicTacToe.play(p2, p1) if winner == -1: draw += 1 elif winner == 1: p1_wins += 1 elif winner == 2: p2_wins += 1 if iterator % 100 == 0: writer.writerow([p1_wins, p2_wins, draw]) p1_wins = 0 p2_wins = 0 draw = 0 iterator += 1 print('Saving policy') p1.dump_policy_to_csv('p1_policy.csv') p2.dump_policy_to_csv('p2_policy.csv') except Exception as e: print(str(e)) print("Program crashed - saving policy...") p1.dump_policy_to_csv('p1_policy.csv') except KeyboardInterrupt: print("Program crashed - saving policy...") p1.dump_policy_to_csv('p1_policy.csv') p2.dump_policy_to_csv('p2_policy.csv')
def playHalf(p1, p2, numG, games): t = TicTacToe(p1, p2, verbose=self.VERBOSE) result = np.zeros(3) for g in range(numG): if g % math.ceil(0.05 * numGames) == 0: progress(games + g, numGames, status="Playing Games") print() winner = t.play() t.reset() result[winner + 1] += 1 return result
class TestGame(unittest.TestCase): def setUp(self): self.game = TicTacToe() self.game.board[4] = "X" def test_available_moves(self): self.assertTrue(4 not in self.game.available_moves()) def test_make_move(self): self.assertFalse(self.game.make_move(4, "X")) self.assertTrue(self.game.make_move(5, "X")) self.assertTrue(self.game.board[5] != " ")
def __init__(self, parent=None): QtWidgets.QWidget.__init__(self, parent) self.ui = Ui_Dialog() self.ui.setupUi(self) self.btns = [ self.ui.pb_1, self.ui.pb_2, self.ui.pb_3, self.ui.pb_4, self.ui.pb_5, self.ui.pb_6, self.ui.pb_7, self.ui.pb_8, self.ui.pb_9 ] for i, btn in enumerate(self.btns): btn.clicked.connect(lambda _, b=i: self.pb(b)) self.ui.pb_reset.clicked.connect(self.reset) self.tic = TicTacToe()
def start_game(self, player1: Human, player2: Union[Human, Bot]) -> None: """ Chooses game mode (humans or human vs bot) :param player1: Player :param player2: Player :return: None """ if isinstance(player2, Human): self.game = TicTacToe(player1=player1, player2=player2) else: self.game = TicTacToe(player1=player1, bot=player2) more = 'y' while more != 'n': more = self.game_loop(player1, player2)
def iterate(self, game: TicTacToe, train: bool = False, print_q: bool = False): moves = game.get_legal_moves() move = self.random_move(moves) field_hash = game.get_hash() if self.ai: move = self.get_optimal_move(moves, field_hash, print_q, train) self.history = [{ "hash": field_hash, "move": str(move) }] + self.history game.input(int(move))
def activate(self, inputs): game = TicTacToe._createFromState(inputs) possibles = game.possibleMoves() return [ random.random() if index in possibles else 0.0 for index in range(0, 9) ]
def main(): pygame.init() clock = pygame.time.Clock() # Create game tictactoe = TicTacToe() board = Board(tictactoe, **BOARD_ARGS) # Initialize players player0 = (PLAYER0, strategy.load_player(PLAYER0)(tictactoe)) player1 = (PLAYER1, strategy.load_player(PLAYER1)(tictactoe)) ACTIVE_PLAYER = player0 PASIVE_PLAYER = player1 pygame.display.update() # Start games while True: if tictactoe.winner() == -2: if ACTIVE_PLAYER[0] == USER: for event in pygame.event.get(): if event.type == QUIT: pygame.quit() sys.exit() elif event.type == MOUSEBUTTONUP: x, y = event.pos if board.process_click(x, y): ACTIVE_PLAYER, PASIVE_PLAYER = PASIVE_PLAYER, ACTIVE_PLAYER else: pos = ACTIVE_PLAYER[1].getmove(tictactoe.copy()) if not board.play_turn(pos): raise ValueError("{}: \nInvalid move:{}".format( ACTIVE_PLAYER[0], str(pos))) ACTIVE_PLAYER, PASIVE_PLAYER = PASIVE_PLAYER, ACTIVE_PLAYER else: for event in pygame.event.get(): if event.type == QUIT: pygame.quit() sys.exit() pygame.display.update() clock.tick(30)
def main(rounds): global ticTacToe global wins global losses initial_q_value = 0.0 alpha = 0.5 # Step size gamma = 1.0 # Discount factor epsilon = 0.2 # Exploration rate actions = [(i, j) for i in range(3) for j in range(3)] actions_per_state = {a: initial_q_value for a in actions} Q_values = {'terminal': actions_per_state.copy()} Q_values['terminal'][None] = 0.0 first_turn_random_count = 0 # Running Q-Learning Q-value updates for many episodes for i in range(rounds): if i % 10000 == 0: print("Rounds done = {}".format(i), end=" | ") print("Wins = {}".format(wins), end=" | ") print("Losses = {}".format(losses)) wins = 0 losses = 0 ticTacToe = TicTacToe() first_turn = random.choice(['random', 'computer']) if first_turn == 'random': first_turn_random_count += 1 # Random player playing one turn # Q opposition player playing one turn selected_grid = epsilon_greedy_for_opposition( Q_values, ticTacToe.get_current_state(), actions_per_state, epsilon) if selected_grid not in ticTacToe.get_empty_cells(): selected_grid = random.choice(ticTacToe.get_empty_cells()) ticTacToe.set_one_grid(selected_grid[0], selected_grid[1]) ticTacToe.toggle_turn() Q_values = Q_Learning(Q_values, alpha, gamma, epsilon, actions_per_state) # pprint(Q_values) policy = find_optimal_policy(Q_values) print("First turn by random players = {}%".format(first_turn_random_count * 100 / rounds)) filename = "Q_values_{}_episodes_025_epsilon.p".format(rounds) pickle.dump(Q_values, open(filename, "wb")) filename = "policy_{}_episodes_025_epsilon.p".format(rounds) pickle.dump(policy, open(filename, "wb"))
async def start_new_match(self): logger.info('Starting new match') game = TicTacToe(turn=self.random_piece()) self.latest_id = self.latest_id + 1 self.boards[self.latest_id] = {'game': game} data = {'board-started': {'board-id': self.latest_id}} await asyncio.wait( [ws.send(json.dumps(data)) for ws in self.connected]) await self.start_turn(self.latest_id, game.turn)
class TestWinner(unittest.TestCase): def setUp(self): self.game = TicTacToe() def test_check_column_win(self): self.game.board = [['X', 'X', 'X'], [' ', 'O', ' '], [' ', ' ', ' ']] self.assertTrue(self.game.winner(6, 'X')) def test_check_row_win(self): self.game.board = ['X', 'X', 'X', ' ', 'O', ' ', ' ', ' ', ' '] self.assertTrue(self.game.winner(0, 'X')) def test_check_first_diagonal_win(self): self.game.board = ['X', ' ', ' ', ' ', 'X', ' ', ' ', ' ', 'X'] self.assertTrue(self.game.winner(2, 'X')) def test_check_second_diagonal_win(self): self.game.board = [' ', ' ', 'X', ' ', 'X', ' ', 'X', ' ', ' '] self.assertFalse(self.game.winner(5, 'X'))
def main(): tictactoe = TicTacToe() while True: print() print('x player move...') if controller.move_player(tictactoe, 'x'): break print() print('o player move...') if controller.move_player(tictactoe, 'o'): break
class TicTac(QtWidgets.QDialog): def __init__(self, parent=None): QtWidgets.QWidget.__init__(self, parent) self.ui = Ui_Dialog() self.ui.setupUi(self) self.btns = [ self.ui.pb_1, self.ui.pb_2, self.ui.pb_3, self.ui.pb_4, self.ui.pb_5, self.ui.pb_6, self.ui.pb_7, self.ui.pb_8, self.ui.pb_9 ] for i, btn in enumerate(self.btns): btn.clicked.connect(lambda _, b=i: self.pb(b)) self.ui.pb_reset.clicked.connect(self.reset) self.tic = TicTacToe() def pb(self, n): if self.btns[n].text() == "": self.tic = self.tic.move(n) if self.update(): move = self.tic.best() self.tic = self.tic.move(move) self.update() def update(self): for i in range(9): self.btns[i].setText(self.tic.board[i // 3][i % 3].strip()) s = self.tic.check_game() if s: QtWidgets.QMessageBox.about(self, "Game Over!", s) self.reset() return False return True def reset(self): self.tic = TicTacToe() self.update()
def generateExamples(p1, p2, numGames, ext="", save=True): # p1 plays p2 numGames times and the gamehistory # and result of the games are returned as two numpy arrays filename = os.path.join( "data", "u_games_{}k".format(numGames // 1000) + ext + ".pbz2") if os.path.isfile(filename): return filename x_train = [] y_train = [] for i in range(numGames): t = TicTacToe(p1, p2, verbose=False) result = t.play() x_train.append(t.board.gameHistory) y_train.extend([result] * len(t.board.gameHistory)) if i % math.ceil(0.01 * numGames) == 0: progress(i, numGames, status="Generating games") x_t = np.vstack(x_train) y_t = np.array(y_train) if save: with bz2.open(filename, "wb") as f: pickle.dump((x_t, y_t), f) return filename else: return (x_t, y_t)
def activate(self, inputs): game = TicTacToe._createFromState(inputs) winners = game._almostWinners() possibles = game.possibleMoves() moves = [ random.random() if index in possibles and winners[index] is None else 0.0 for index in range(0, 9) ] if moves.count(0.0) == 9: return [ random.random() if index in possibles else 0.0 for index in range(0, 9) ] return moves
def __init__(self, player1=None, player2=None, master=None, engine=None): #Assign engine if engine is None: self.tc = TicTacToe(player1, player2) else: self.tc = engine #Open images for GUI self.bg_image = ImageTk.PhotoImage( Image.open('game/graphics/background.png')) self.x_image = ImageTk.PhotoImage(Image.open('game/graphics/x.png')) self.o_image = ImageTk.PhotoImage(Image.open('game/graphics/o.png')) self.empty_image = ImageTk.PhotoImage( Image.open('game/graphics/empty.png')) #Configure GUI master.minsize(width=self.bg_image.width(), height=self.bg_image.height()) super().__init__(master) self.pack() #set background label1 = Label(master, image=self.bg_image) label1.pack() self.create_widgets()
def simulateGame(player, opponent): # Returns fitness delta game = TicTacToe() illegal_moves = 0 while not game.isFinished(): # TODO track stats? if game.isOurTurn(): try: pickAndMakeMove(game, player) except IndexError: illegal_moves += 1 if illegal_moves >= NUMBER_ILLEGAL_MOVES_ALLOWED: # Penalise player return -NUMBER_TO_SAMPLE else: continue if game.isFinished(): break try: pickAndMakeMove(game, opponent) except IndexError: pickAndMakeMove(game, agents.RandomAgent()) else: # Not our turn try: pickAndMakeMove(game, opponent) except IndexError: pickAndMakeMove(game, agents.RandomAgent()) if game.isFinished(): break try: pickAndMakeMove(game, player) except IndexError: illegal_moves += 1 if illegal_moves >= NUMBER_ILLEGAL_MOVES_ALLOWED: # Penalise player return -NUMBER_TO_SAMPLE else: continue # Game is finished (or illegal move made) # TODO debug prints, or stats return game.score()
class TestGame(unittest.TestCase): def setUp(self): self.game = TicTacToe() self.game.board[4] = "X" def test_available_moves(self): self.assertTrue(4 not in self.game.available_moves()) def test_make_move(self): self.assertFalse(self.game.make_move(4, "X")) self.assertTrue(self.game.make_move(5, "X")) self.assertTrue(self.game.board[5] != " ") def test_winner(self): """ у функції test_winner є три інші функції columns_check, rows_check, diagonal_check у кожній функції створюється всі можливі варіанти заповнення дошки для виграшу (але тільки одна лінія, всі інші клітинки дошки пусті) :return: """ def columns_check(letter): for columns in range(3): for rows in range(3): self.game.board = [" " for i in range(9)] row = [letter] * 3 self.game.board[rows * 3:(rows + 1) * 3] = row # print(self.game.board) self.assertTrue( self.game.winner(columns + rows * 3, letter)) def rows_check(letter): for columns in range(3): for rows in range(3): self.game.board = [" " for i in range(9)] self.game.board[columns], self.game.board[ columns + 3], self.game.board[columns + 6] = letter * 3 # print(self.game.board) self.assertTrue( self.game.winner(rows * 3 + columns, letter)) def diagonal_check(letter): a = [0, 4, 8] b = [2, 4, 6] for i in (a, b): for x in i: self.game.board = [" " for i in range(9)] for m in range(3): self.game.board[i[m]] = letter # print(self.game.board) self.assertTrue(self.game.winner(x, letter)) for l in ("X", "O"): columns_check(l) rows_check(l) diagonal_check(l)
def games_are_equivalent(): a = TicTacToe() b = BitTicTacToe() for i in range(10000): while True: ma = a.available_moves() mb = b.available_moves() if not sorted(ma) == sorted(mb): raise Exception() m = ma[randint(0, len(ma) - 1)] a.move(m) b.move(m) if a.is_done() != b.is_done(): raise Exception() if not ((a.winner == TicTacToe.X and b.winner == BitTicTacToe.X) or (a.winner == TicTacToe.O and b.winner == BitTicTacToe.O) or (a.winner == TicTacToe.Empty and b.winner == BitTicTacToe.Empty)): raise Exception() if a.is_done(): break a.clear() b.clear()
def human_against_agent(player_num, filename): play = True while play == True: # Assign players if player_num == 1: player_1 = Human(1) player_2 = Agent(2, -1, 0.0, .1, 0.9, True, filename) else: player_1 = Agent(1, -1, 0.0, .1, 0.9, True, filename) player_2 = Human(2) winner, board_state_hash = TicTacToe.play(player_1, player_2) if player_num == 2: # then player 1 must be the agent print("Thanks for the training data silly human!") player_1.dump_policy_to_csv('p1_policy.csv') action = input('Play Again? (y or n)') if action != 'y': return
def start_session(self) -> None: """ Starts session of the game :return: None """ log = f'Session started...' logging.info(log) while True: choice = self.menu.show_menu() if choice == 4: break elif choice == 3: self.load_history() elif choice == 2: side1, side2 = TicTacToe.sides() player, bot = self.init_human_bot(side1, side2) self.start_game(player, bot) else: player1, player2 = self.init_humans() self.start_game(player1, player2) log = 'Exiting from the game...\n\n' logging.info(log)
def setup_game(): # determine versus human or versus computer play_against_comp = '' while not play_against_comp in ['Y', 'N']: play_against_comp = input( '\nDo you want to play against the computer? (Y/N) ').upper() computer = True if play_against_comp == 'Y' else False # choose letters letter1 = '' name1 = input('Player 1, what shall we call you? ') name2 = '' if not computer: name2 = input('Player 2, What shall we call you? ') while not letter1 in ['X', 'O']: letter1 = input( f'{name1}, which letter do you want to be, X or O?: ').upper() letter2 = 'X' if letter1 == 'O' else 'O' # init game player1 = Human(letter1, name1) player2 = Computer(letter2) if computer else Human(letter2, name2) play_game(player1, player2, TicTacToe())
from nn import NeuralNetwork from helpers import get_best_legal_move from tkinter import * from functools import partial from game import TicTacToe from tkinter import filedialog master = Tk() buttons = [] who_is_first_buttons = [] replay_btn = None game = TicTacToe() nn = None X = PhotoImage(file='X.gif') O = PhotoImage(file='O.gif') Empty = PhotoImage(file='Empty.png') def who_is_first(val): for b in who_is_first_buttons: b.grid_remove() who_is_first_buttons.clear() init_buttons() if val: bot_move() def ask_who_is_first(): you_first = Button(master,
class MCTSPlayer(Player): rootnode = ">" def __init__(self, ep=np.sqrt(2), numplayouts=20, movetime=45): self.gt = GameTree() self.ep = ep self.numpl = numplayouts self.movet = movetime self.p1 = SophisticatedRandomPlayer() self.game = TicTacToe(self.p1, self.p1, verbose=False) def startGame(self): print("MCTSPlayer starts game") self.gt = GameTree() # define root node (i.e. current game state) self.gt.add_node(node=MCTSPlayer.rootnode, data=[0, 0], parent=None) def move(self, board): print("MCTSPlayer thinks...") nodecnt = 0 t0 = time.time() rtnode, alrdy_in = self.board_already_in_gt(board) # print("RTNODE: ", rtnode) # node not in; if node in, no need to do anything if not alrdy_in: # need to check if any parents are in gt. rtnode_would_be_parents = self.gt.get_would_be_parents_to_root( rtnode) # get parent in game tree, not grandparent, etc # print("RTNode parents", rtnode_would_be_parents) last_parent_in_gt = max(list( filter(lambda x: self.gt.node_in_tree(x), rtnode_would_be_parents)), key=lambda x: len(x)) # print("Latest parent: ", last_parent_in_gt) # print("Parent - RTnode", rtnode.replace(last_parent_in_gt, "")) # add all children of that parent last_added_parent = last_parent_in_gt for m in rtnode.replace(last_parent_in_gt, ""): # print("adding: ", m, "with parent: ", last_added_parent) self.gt.add_node(m, [0, 0], last_added_parent) last_added_parent = last_added_parent + m # which player are we? player = board.state[18] while True: nodecnt += 1 # print("RTNODE in WHile LOOP: ", rtnode) chosennode = self.choose_next_node(rtnode, board) if time.time() - t0 > self.movet: break nodeboard = Board(board.state.copy()) for m in chosennode.replace(rtnode, ""): nodeboard.pushMove(int(m)) # Add children of current node for m in nodeboard.legalMoves(): self.gt.add_node(str(m), data=[0, 0], parent=chosennode) score = self.playout(nodeboard, player) # back propagate score & numVisits # print("Chosen Node: '" + chosennode + "'") for node in self.gt.get_parents_to_root(chosennode): data = self.gt.get_data(node) data[0] += score data[1] += 1 self.gt.update_data(node, data) best_move = None best_numVisits = 0 for m in self.gt.get_children(rtnode): data = self.gt.get_data(m) if data[1] > best_numVisits: best_numVisits = data[1] best_move = m # return max(self.gt.get_children(MCTSPlayer.rootnode), key = lambda m: self.gt.get_data(m)[1]) # print("node", rtnode, 'children visits', list(map(lambda x: (x,self.gt.get_data(x)[1]), self.gt.get_children(rtnode)))) print( "MCTSPLayer explored {0} nodes in {1} seconds at {2:.2f} nodes/s". format(nodecnt, self.movet, nodecnt / self.movet)) return int(best_move[-1]) def board_already_in_gt(self, board): # returns most visited node if the board position input is already in the gt, and whether the node is in game tree num_moves = np.sum(board.state[:18]) possibilities = filter(lambda x: len(x) == num_moves + 1, self.gt.get_all_nodes()) equivalents = [] for p in possibilities: b = Board() for m in p[1:]: b.pushMove(int(m)) if np.array_equal(b.state, board.state): equivalents.append(p) if len(equivalents) == 0: bdarray = board.state.copy()[:-1] where1 = list(np.where(bdarray[:9] == 1)[0]) where2 = list(np.where(bdarray[9:] == 1)[0]) node = [] for i in range(min(len(where1), len(where2))): print("I:", i) node.append(str(where1[i])) node.append(str(where2[i])) if len(where1) > len(where2): node.append(str(where1[-1])) node = "".join(node) node = MCTSPlayer.rootnode + node.strip() return node, False else: return max(equivalents, key=lambda x: self.gt.get_data(x)[1]), True def playout(self, board, player): gameover, winner = board.isGameOver() if gameover: if winner is None: return 0 elif winner == player: return 1 else: return -1 scr = 0 for _ in range(self.numpl): self.game.board = Board(board.state.copy()) scr += self.game.play() * (1 - 2 * player) return scr / self.numpl def choose_next_level_node(self, currnode, board): # returns node, Terminal (bool) children = self.gt.get_children(currnode) if len(children) == 0: return currnode, True childUCTs = list( map(lambda cnode: self.calcUCT( currnode, cnode, ), children)) bestUCT = -1e6 bestchild = "-------------" # print("children: ", children) # print("childUCTS: ", childUCTs) #terminal = True for index in range(len(childUCTs)): terminal = self.is_node_terminal(children[index], board) if childUCTs[index] is None: # print("Child has None UCT index", index, 'child', children[index], 'child unct', childUCTs[index]) return children[index], terminal elif childUCTs[index] > bestUCT: # print("Child has not None UCT, index", index, 'child', children[index], 'child unct', childUCTs[index]) bestUCT = childUCTs[index] bestchild = children[index] # print('bestchild', bestchild) return bestchild, terminal def choose_next_node(self, currnode, board): parent = currnode # Equiv to while TRUE; maybe change? for _ in range(9): node, terminal = self.choose_next_level_node(parent, board) # print('choose_next_node: node ', node, 'terminal ', terminal) if terminal: return node else: parent = node def is_node_terminal(self, node, rootboard): # returns terminal, winner at current node bd = Board(rootboard.state.copy()) for m in node[1:]: bd.pushMove(int(m)) gameover, _ = bd.isGameOver() if gameover: # print("GameOver True") return True elif self.gt.get_data(node)[1] == 0: # print("GameOver False, 0 Visits True") return True else: return False def calcUCT(self, parentnode, childnode): if self.gt.get_data(childnode)[1] == 0: return None return self.gt.get_data(childnode)[0] / self.gt.get_data( childnode)[1] + self.ep * np.sqrt( np.log(self.gt.get_data(parentnode)[1]) / self.gt.get_data(childnode)[1])