class TestWinner(unittest.TestCase):
    def setUp(self):
        self.game_unittest = TicTacToe()

    def test_row_unittest(self):
        print("Checking row")
        self.game_unittest.board = [
            "X", "0", "X", " ", "0", "0", "X", "0", "X"
        ]
        self.assertTrue(self.game_unittest.winner(2, "0"))
        print(self.game_unittest.board)

    def test_column_unittest(self):
        print("Checking column")
        self.game_unittest.board = [
            "X", "0", "X", " ", "0", "0", "0", "0", "X"
        ]
        self.assertTrue(self.game_unittest.winner(5, "0"))
        print(self.game_unittest.board)

    def test_diagonal_unittest(self):
        print("Checking diagonal")
        self.game_unittest.board = [
            "X", "0", "X", " ", "X", "0", "0", "0", "X"
        ]
        self.assertTrue(self.game_unittest.winner(8, "X"))
        print(self.game_unittest.board)

    def other_conditions(self):
        print("None of checks are true")
        self.game_unittest.board = ["X", "0", "X" "0", "X", "0" "0", "X", " "]
        self.assertFalse(self.game_unittest.winner(1, "X"))
Beispiel #2
0
 def chose_action(self, board_state_hash):
     state = State.hash_to_state(board_state_hash)
     action = ''
     while action == '':
         TicTacToe.print_board(state)
         action = input('Your move? i.e. x,y : ')
     return (int(action.split(',')[0]), int(action.split(',')[1]))
Beispiel #3
0
def main():

    game = TicTacToe()
    game.start()

    while not game.over:
        game.turn()
Beispiel #4
0
 def __init__(self, ep=np.sqrt(2), numplayouts=20, movetime=45):
     self.gt = GameTree()
     self.ep = ep
     self.numpl = numplayouts
     self.movet = movetime
     self.p1 = SophisticatedRandomPlayer()
     self.game = TicTacToe(self.p1, self.p1, verbose=False)
Beispiel #5
0
 def end_of_episode(self, winner, board_state_hash):
     state = State.hash_to_state(board_state_hash)
     TicTacToe.print_board(state)
     if winner == -1:
         print('Game over! It was a draw.')
     else:
         print('Game over! Winner: Player {0}'.format(winner))
Beispiel #6
0
class Test3(unittest.TestCase):
    def setUp(self):
        print('Starting a new test ...')
        self.game1 = TicTacToe(configparam=('C', 'C', "X", "O",1,0, 'ComputerVComputer'))
        self.game1.start_game()

    def test_game_1_0(self):
        self.assertTrue(self.game1.board.game_state in ["win", "tie"])
Beispiel #7
0
def train_agent(iterations):
  try:
    p1 = Agent(
      1, # Player #
      -10, # Loss Value
      0.2, # Epsilon (exploration rate)
      0.2, # Alpha (learning rate)
      0.9, # Gamma (make infinite sum finite) - https://stats.stackexchange.com/questions/221402/understanding-the-role-of-the-discount-factor-in-reinforcement-learning
      True, # Update learner
    )
    p2 = p1
    # p2 = Agent(2, -1, 1, 0.1, 0.9, False) # Random Agent

    series = ['P1_Wins', 'P2_Wins', 'Draw']
    f = open('results.csv', 'w')
    writer = csv.writer(f)
    writer.writerow(series)

    p1_wins = 0
    p2_wins = 0
    draw = 0
    iterator = 1

    for i in trange(iterations):

      if iterator % 2 == 0:
        winner, board_state_hash = TicTacToe.play(p1, p2)
      else:
        winner, board_state_hash = TicTacToe.play(p2, p1)

      if winner == -1:
        draw += 1
      elif winner  == 1:
        p1_wins += 1
      elif winner == 2:
        p2_wins += 1

      if iterator % 100 == 0:
        writer.writerow([p1_wins, p2_wins, draw])
        p1_wins = 0
        p2_wins = 0
        draw = 0

      iterator += 1

    print('Saving policy')
    p1.dump_policy_to_csv('p1_policy.csv')
    p2.dump_policy_to_csv('p2_policy.csv')
  except Exception as e:
    print(str(e))
    print("Program crashed - saving policy...")
    p1.dump_policy_to_csv('p1_policy.csv')
  except KeyboardInterrupt:
    print("Program crashed - saving policy...")
    p1.dump_policy_to_csv('p1_policy.csv')
    p2.dump_policy_to_csv('p2_policy.csv')
Beispiel #8
0
 def playHalf(p1, p2, numG, games):
     t = TicTacToe(p1, p2, verbose=self.VERBOSE)
     result = np.zeros(3)
     for g in range(numG):
         if g % math.ceil(0.05 * numGames) == 0:
             progress(games + g, numGames, status="Playing Games")
         print()
         winner = t.play()
         t.reset()
         result[winner + 1] += 1
     return result
Beispiel #9
0
class TestGame(unittest.TestCase):
    def setUp(self):
        self.game = TicTacToe()
        self.game.board[4] = "X"

    def test_available_moves(self):
        self.assertTrue(4 not in self.game.available_moves())

    def test_make_move(self):
        self.assertFalse(self.game.make_move(4, "X"))
        self.assertTrue(self.game.make_move(5, "X"))
        self.assertTrue(self.game.board[5] != " ")
Beispiel #10
0
    def __init__(self, parent=None):
        QtWidgets.QWidget.__init__(self, parent)
        self.ui = Ui_Dialog()
        self.ui.setupUi(self)
        self.btns = [
            self.ui.pb_1, self.ui.pb_2, self.ui.pb_3, self.ui.pb_4,
            self.ui.pb_5, self.ui.pb_6, self.ui.pb_7, self.ui.pb_8,
            self.ui.pb_9
        ]

        for i, btn in enumerate(self.btns):
            btn.clicked.connect(lambda _, b=i: self.pb(b))
        self.ui.pb_reset.clicked.connect(self.reset)
        self.tic = TicTacToe()
 def start_game(self, player1: Human, player2: Union[Human, Bot]) -> None:
     """
     Chooses game mode (humans or human vs bot)
     :param player1: Player
     :param player2: Player
     :return: None
     """
     if isinstance(player2, Human):
         self.game = TicTacToe(player1=player1, player2=player2)
     else:
         self.game = TicTacToe(player1=player1, bot=player2)
     more = 'y'
     while more != 'n':
         more = self.game_loop(player1, player2)
Beispiel #12
0
    def iterate(self,
                game: TicTacToe,
                train: bool = False,
                print_q: bool = False):
        moves = game.get_legal_moves()
        move = self.random_move(moves)
        field_hash = game.get_hash()

        if self.ai:
            move = self.get_optimal_move(moves, field_hash, print_q, train)
            self.history = [{
                "hash": field_hash,
                "move": str(move)
            }] + self.history

        game.input(int(move))
Beispiel #13
0
 def activate(self, inputs):
     game = TicTacToe._createFromState(inputs)
     possibles = game.possibleMoves()
     return [
         random.random() if index in possibles else 0.0
         for index in range(0, 9)
     ]
Beispiel #14
0
def main():
    pygame.init()
    clock = pygame.time.Clock()

    # Create game

    tictactoe = TicTacToe()
    board = Board(tictactoe, **BOARD_ARGS)

    # Initialize players

    player0 = (PLAYER0, strategy.load_player(PLAYER0)(tictactoe))
    player1 = (PLAYER1, strategy.load_player(PLAYER1)(tictactoe))

    ACTIVE_PLAYER = player0
    PASIVE_PLAYER = player1

    pygame.display.update()

    # Start games

    while True:
        if tictactoe.winner() == -2:
            if ACTIVE_PLAYER[0] == USER:
                for event in pygame.event.get():
                    if event.type == QUIT:
                        pygame.quit()
                        sys.exit()
                    elif event.type == MOUSEBUTTONUP:
                        x, y = event.pos
                        if board.process_click(x, y):
                            ACTIVE_PLAYER, PASIVE_PLAYER = PASIVE_PLAYER, ACTIVE_PLAYER
            else:
                pos = ACTIVE_PLAYER[1].getmove(tictactoe.copy())
                if not board.play_turn(pos):
                    raise ValueError("{}: \nInvalid move:{}".format(
                        ACTIVE_PLAYER[0], str(pos)))
                ACTIVE_PLAYER, PASIVE_PLAYER = PASIVE_PLAYER, ACTIVE_PLAYER
        else:
            for event in pygame.event.get():
                if event.type == QUIT:
                    pygame.quit()
                    sys.exit()

        pygame.display.update()
        clock.tick(30)
Beispiel #15
0
def main(rounds):
    global ticTacToe
    global wins
    global losses

    initial_q_value = 0.0
    alpha = 0.5  # Step size
    gamma = 1.0  # Discount factor
    epsilon = 0.2  # Exploration rate

    actions = [(i, j) for i in range(3) for j in range(3)]
    actions_per_state = {a: initial_q_value for a in actions}

    Q_values = {'terminal': actions_per_state.copy()}
    Q_values['terminal'][None] = 0.0

    first_turn_random_count = 0
    # Running Q-Learning Q-value updates for many episodes
    for i in range(rounds):
        if i % 10000 == 0:
            print("Rounds done = {}".format(i), end=" | ")
            print("Wins = {}".format(wins), end=" | ")
            print("Losses = {}".format(losses))
            wins = 0
            losses = 0
        ticTacToe = TicTacToe()

        first_turn = random.choice(['random', 'computer'])
        if first_turn == 'random':
            first_turn_random_count += 1
            # Random player playing one turn
            # Q opposition player playing one turn
            selected_grid = epsilon_greedy_for_opposition(
                Q_values, ticTacToe.get_current_state(), actions_per_state,
                epsilon)
            if selected_grid not in ticTacToe.get_empty_cells():
                selected_grid = random.choice(ticTacToe.get_empty_cells())
            ticTacToe.set_one_grid(selected_grid[0], selected_grid[1])
            ticTacToe.toggle_turn()

        Q_values = Q_Learning(Q_values, alpha, gamma, epsilon,
                              actions_per_state)

    # pprint(Q_values)
    policy = find_optimal_policy(Q_values)
    print("First turn by random players = {}%".format(first_turn_random_count *
                                                      100 / rounds))
    filename = "Q_values_{}_episodes_025_epsilon.p".format(rounds)
    pickle.dump(Q_values, open(filename, "wb"))
    filename = "policy_{}_episodes_025_epsilon.p".format(rounds)
    pickle.dump(policy, open(filename, "wb"))
Beispiel #16
0
    async def start_new_match(self):
        logger.info('Starting new match')

        game = TicTacToe(turn=self.random_piece())
        self.latest_id = self.latest_id + 1
        self.boards[self.latest_id] = {'game': game}

        data = {'board-started': {'board-id': self.latest_id}}
        await asyncio.wait(
            [ws.send(json.dumps(data)) for ws in self.connected])
        await self.start_turn(self.latest_id, game.turn)
Beispiel #17
0
class TestWinner(unittest.TestCase):


    def setUp(self):
        self.game = TicTacToe()

    def test_check_column_win(self):
        self.game.board = [['X', 'X', 'X'], [' ', 'O', ' '], [' ', ' ', ' ']]
        self.assertTrue(self.game.winner(6, 'X'))

    def test_check_row_win(self):
        self.game.board = ['X', 'X', 'X', ' ', 'O', ' ', ' ', ' ', ' ']
        self.assertTrue(self.game.winner(0, 'X'))

    def test_check_first_diagonal_win(self):
        self.game.board = ['X', ' ', ' ', ' ', 'X', ' ', ' ', ' ', 'X']
        self.assertTrue(self.game.winner(2, 'X'))

    def test_check_second_diagonal_win(self):
        self.game.board = [' ', ' ', 'X', ' ', 'X', ' ', 'X', ' ', ' ']
        self.assertFalse(self.game.winner(5, 'X'))
Beispiel #18
0
def main():

    game = TicTacToe()
    game.start()

    while not game.over:
        game.turn()
Beispiel #19
0
def main():
    tictactoe = TicTacToe()

    while True:
        print()
        print('x player move...')
        if controller.move_player(tictactoe, 'x'):
            break

        print()
        print('o player move...')
        if controller.move_player(tictactoe, 'o'):
            break
Beispiel #20
0
class TicTac(QtWidgets.QDialog):
    def __init__(self, parent=None):
        QtWidgets.QWidget.__init__(self, parent)
        self.ui = Ui_Dialog()
        self.ui.setupUi(self)
        self.btns = [
            self.ui.pb_1, self.ui.pb_2, self.ui.pb_3, self.ui.pb_4,
            self.ui.pb_5, self.ui.pb_6, self.ui.pb_7, self.ui.pb_8,
            self.ui.pb_9
        ]

        for i, btn in enumerate(self.btns):
            btn.clicked.connect(lambda _, b=i: self.pb(b))
        self.ui.pb_reset.clicked.connect(self.reset)
        self.tic = TicTacToe()

    def pb(self, n):
        if self.btns[n].text() == "":
            self.tic = self.tic.move(n)
            if self.update():
                move = self.tic.best()
                self.tic = self.tic.move(move)
                self.update()

    def update(self):
        for i in range(9):
            self.btns[i].setText(self.tic.board[i // 3][i % 3].strip())
        s = self.tic.check_game()
        if s:
            QtWidgets.QMessageBox.about(self, "Game Over!", s)
            self.reset()
            return False
        return True

    def reset(self):
        self.tic = TicTacToe()
        self.update()
Beispiel #21
0
def generateExamples(p1, p2, numGames, ext="", save=True):
    # p1 plays p2 numGames times and the gamehistory
    # and result of the games are returned as two numpy arrays
    filename = os.path.join(
        "data", "u_games_{}k".format(numGames // 1000) + ext + ".pbz2")
    if os.path.isfile(filename):
        return filename
    x_train = []
    y_train = []
    for i in range(numGames):
        t = TicTacToe(p1, p2, verbose=False)
        result = t.play()
        x_train.append(t.board.gameHistory)
        y_train.extend([result] * len(t.board.gameHistory))
        if i % math.ceil(0.01 * numGames) == 0:
            progress(i, numGames, status="Generating games")
    x_t = np.vstack(x_train)
    y_t = np.array(y_train)
    if save:
        with bz2.open(filename, "wb") as f:
            pickle.dump((x_t, y_t), f)
        return filename
    else:
        return (x_t, y_t)
Beispiel #22
0
 def activate(self, inputs):
     game = TicTacToe._createFromState(inputs)
     winners = game._almostWinners()
     possibles = game.possibleMoves()
     moves = [
         random.random()
         if index in possibles and winners[index] is None else 0.0
         for index in range(0, 9)
     ]
     if moves.count(0.0) == 9:
         return [
             random.random() if index in possibles else 0.0
             for index in range(0, 9)
         ]
     return moves
Beispiel #23
0
    def __init__(self, player1=None, player2=None, master=None, engine=None):
        #Assign engine
        if engine is None:
            self.tc = TicTacToe(player1, player2)
        else:
            self.tc = engine

        #Open images for GUI
        self.bg_image = ImageTk.PhotoImage(
            Image.open('game/graphics/background.png'))
        self.x_image = ImageTk.PhotoImage(Image.open('game/graphics/x.png'))
        self.o_image = ImageTk.PhotoImage(Image.open('game/graphics/o.png'))
        self.empty_image = ImageTk.PhotoImage(
            Image.open('game/graphics/empty.png'))
        #Configure GUI
        master.minsize(width=self.bg_image.width(),
                       height=self.bg_image.height())
        super().__init__(master)

        self.pack()
        #set background
        label1 = Label(master, image=self.bg_image)
        label1.pack()
        self.create_widgets()
Beispiel #24
0
def simulateGame(player, opponent):
    # Returns fitness delta
    game = TicTacToe()
    illegal_moves = 0
    while not game.isFinished():

        # TODO track stats?

        if game.isOurTurn():
            try:
                pickAndMakeMove(game, player)
            except IndexError:
                illegal_moves += 1
                if illegal_moves >= NUMBER_ILLEGAL_MOVES_ALLOWED:
                    # Penalise player
                    return -NUMBER_TO_SAMPLE
                else:
                    continue

            if game.isFinished():
                break

            try:
                pickAndMakeMove(game, opponent)
            except IndexError:
                pickAndMakeMove(game, agents.RandomAgent())
        else:  # Not our turn
            try:
                pickAndMakeMove(game, opponent)
            except IndexError:
                pickAndMakeMove(game, agents.RandomAgent())

            if game.isFinished():
                break

            try:
                pickAndMakeMove(game, player)
            except IndexError:
                illegal_moves += 1
                if illegal_moves >= NUMBER_ILLEGAL_MOVES_ALLOWED:
                    # Penalise player
                    return -NUMBER_TO_SAMPLE
                else:
                    continue

    # Game is finished (or illegal move made)

    # TODO debug prints, or stats

    return game.score()
class TestGame(unittest.TestCase):
    def setUp(self):
        self.game = TicTacToe()
        self.game.board[4] = "X"

    def test_available_moves(self):
        self.assertTrue(4 not in self.game.available_moves())

    def test_make_move(self):
        self.assertFalse(self.game.make_move(4, "X"))
        self.assertTrue(self.game.make_move(5, "X"))
        self.assertTrue(self.game.board[5] != " ")

    def test_winner(self):
        """
        у функції test_winner є три інші функції columns_check, rows_check, diagonal_check
        у кожній  функції створюється всі можливі варіанти заповнення дошки для виграшу (але тільки одна лінія, всі інші клітинки дошки пусті)
        :return:
        """
        def columns_check(letter):
            for columns in range(3):
                for rows in range(3):
                    self.game.board = [" " for i in range(9)]
                    row = [letter] * 3
                    self.game.board[rows * 3:(rows + 1) * 3] = row
                    # print(self.game.board)
                    self.assertTrue(
                        self.game.winner(columns + rows * 3, letter))

        def rows_check(letter):
            for columns in range(3):
                for rows in range(3):
                    self.game.board = [" " for i in range(9)]
                    self.game.board[columns], self.game.board[
                        columns + 3], self.game.board[columns + 6] = letter * 3
                    # print(self.game.board)
                    self.assertTrue(
                        self.game.winner(rows * 3 + columns, letter))

        def diagonal_check(letter):
            a = [0, 4, 8]
            b = [2, 4, 6]
            for i in (a, b):
                for x in i:
                    self.game.board = [" " for i in range(9)]
                    for m in range(3):
                        self.game.board[i[m]] = letter
                    # print(self.game.board)
                    self.assertTrue(self.game.winner(x, letter))

        for l in ("X", "O"):
            columns_check(l)
            rows_check(l)
            diagonal_check(l)
Beispiel #26
0
def games_are_equivalent():
    a = TicTacToe()
    b = BitTicTacToe()
    for i in range(10000):
        while True:
            ma = a.available_moves()
            mb = b.available_moves()
            if not sorted(ma) == sorted(mb):
                raise Exception()
            m = ma[randint(0, len(ma) - 1)]
            a.move(m)
            b.move(m)
            if a.is_done() != b.is_done():
                raise Exception()
            if not ((a.winner == TicTacToe.X and b.winner == BitTicTacToe.X) or
                    (a.winner == TicTacToe.O and b.winner == BitTicTacToe.O) or
                    (a.winner == TicTacToe.Empty
                     and b.winner == BitTicTacToe.Empty)):
                raise Exception()
            if a.is_done():
                break
        a.clear()
        b.clear()
Beispiel #27
0
def human_against_agent(player_num, filename):
  play = True
  while play == True:
    # Assign players
    if player_num == 1:
      player_1 = Human(1)
      player_2 = Agent(2, -1, 0.0, .1, 0.9, True, filename)
    else:
      player_1 = Agent(1, -1, 0.0, .1, 0.9, True, filename)
      player_2 = Human(2)

    winner, board_state_hash = TicTacToe.play(player_1, player_2)
    if player_num == 2:
      # then player 1 must be the agent
      print("Thanks for the training data silly human!")
      player_1.dump_policy_to_csv('p1_policy.csv')

    action = input('Play Again? (y or n)')
    if action != 'y':
      return
 def start_session(self) -> None:
     """
     Starts session of the game
     :return: None
     """
     log = f'Session started...'
     logging.info(log)
     while True:
         choice = self.menu.show_menu()
         if choice == 4:
             break
         elif choice == 3:
             self.load_history()
         elif choice == 2:
             side1, side2 = TicTacToe.sides()
             player, bot = self.init_human_bot(side1, side2)
             self.start_game(player, bot)
         else:
             player1, player2 = self.init_humans()
             self.start_game(player1, player2)
     log = 'Exiting from the game...\n\n'
     logging.info(log)
def setup_game():

    # determine versus human or versus computer
    play_against_comp = ''
    while not play_against_comp in ['Y', 'N']:
        play_against_comp = input(
            '\nDo you want to play against the computer? (Y/N) ').upper()
    computer = True if play_against_comp == 'Y' else False

    # choose letters
    letter1 = ''
    name1 = input('Player 1, what shall we call you? ')
    name2 = ''
    if not computer: name2 = input('Player 2, What shall we call you? ')

    while not letter1 in ['X', 'O']:
        letter1 = input(
            f'{name1}, which letter do you want to be, X or O?: ').upper()
    letter2 = 'X' if letter1 == 'O' else 'O'

    # init game
    player1 = Human(letter1, name1)
    player2 = Computer(letter2) if computer else Human(letter2, name2)
    play_game(player1, player2, TicTacToe())
Beispiel #30
0
from nn import NeuralNetwork
from helpers import get_best_legal_move

from tkinter import *
from functools import partial
from game import TicTacToe
from tkinter import filedialog

master = Tk()
buttons = []
who_is_first_buttons = []
replay_btn = None

game = TicTacToe()
nn = None
X = PhotoImage(file='X.gif')
O = PhotoImage(file='O.gif')
Empty = PhotoImage(file='Empty.png')


def who_is_first(val):
    for b in who_is_first_buttons:
        b.grid_remove()
    who_is_first_buttons.clear()
    init_buttons()
    if val:
        bot_move()


def ask_who_is_first():
    you_first = Button(master,
Beispiel #31
0
class MCTSPlayer(Player):
    rootnode = ">"

    def __init__(self, ep=np.sqrt(2), numplayouts=20, movetime=45):
        self.gt = GameTree()
        self.ep = ep
        self.numpl = numplayouts
        self.movet = movetime
        self.p1 = SophisticatedRandomPlayer()
        self.game = TicTacToe(self.p1, self.p1, verbose=False)

    def startGame(self):
        print("MCTSPlayer starts game")
        self.gt = GameTree()
        # define root node (i.e. current game state)
        self.gt.add_node(node=MCTSPlayer.rootnode, data=[0, 0], parent=None)

    def move(self, board):
        print("MCTSPlayer thinks...")
        nodecnt = 0
        t0 = time.time()
        rtnode, alrdy_in = self.board_already_in_gt(board)
        # print("RTNODE: ", rtnode)
        # node not in; if node in, no need to do anything
        if not alrdy_in:
            # need to check if any parents are in gt.
            rtnode_would_be_parents = self.gt.get_would_be_parents_to_root(
                rtnode)

            # get parent in game tree, not grandparent, etc
            # print("RTNode parents", rtnode_would_be_parents)
            last_parent_in_gt = max(list(
                filter(lambda x: self.gt.node_in_tree(x),
                       rtnode_would_be_parents)),
                                    key=lambda x: len(x))
            # print("Latest parent: ", last_parent_in_gt)
            # print("Parent - RTnode", rtnode.replace(last_parent_in_gt, ""))

            # add all children of that parent
            last_added_parent = last_parent_in_gt
            for m in rtnode.replace(last_parent_in_gt, ""):
                # print("adding: ", m, "with parent: ", last_added_parent)
                self.gt.add_node(m, [0, 0], last_added_parent)
                last_added_parent = last_added_parent + m

        # which player are we?
        player = board.state[18]

        while True:
            nodecnt += 1
            # print("RTNODE in WHile LOOP: ", rtnode)
            chosennode = self.choose_next_node(rtnode, board)
            if time.time() - t0 > self.movet:
                break
            nodeboard = Board(board.state.copy())
            for m in chosennode.replace(rtnode, ""):
                nodeboard.pushMove(int(m))

            # Add children of current node
            for m in nodeboard.legalMoves():
                self.gt.add_node(str(m), data=[0, 0], parent=chosennode)

            score = self.playout(nodeboard, player)

            # back propagate score & numVisits
            # print("Chosen Node: '" + chosennode + "'")
            for node in self.gt.get_parents_to_root(chosennode):
                data = self.gt.get_data(node)
                data[0] += score
                data[1] += 1
                self.gt.update_data(node, data)

        best_move = None
        best_numVisits = 0
        for m in self.gt.get_children(rtnode):
            data = self.gt.get_data(m)
            if data[1] > best_numVisits:
                best_numVisits = data[1]
                best_move = m
        # return max(self.gt.get_children(MCTSPlayer.rootnode), key = lambda m: self.gt.get_data(m)[1])
        # print("node", rtnode, 'children visits', list(map(lambda x: (x,self.gt.get_data(x)[1]), self.gt.get_children(rtnode))))
        print(
            "MCTSPLayer explored {0} nodes in {1} seconds at {2:.2f} nodes/s".
            format(nodecnt, self.movet, nodecnt / self.movet))
        return int(best_move[-1])

    def board_already_in_gt(self, board):
        # returns most visited node if the board position input is already in the gt, and whether the node is in game tree
        num_moves = np.sum(board.state[:18])
        possibilities = filter(lambda x: len(x) == num_moves + 1,
                               self.gt.get_all_nodes())
        equivalents = []
        for p in possibilities:
            b = Board()
            for m in p[1:]:
                b.pushMove(int(m))
            if np.array_equal(b.state, board.state):
                equivalents.append(p)

        if len(equivalents) == 0:
            bdarray = board.state.copy()[:-1]
            where1 = list(np.where(bdarray[:9] == 1)[0])
            where2 = list(np.where(bdarray[9:] == 1)[0])
            node = []
            for i in range(min(len(where1), len(where2))):
                print("I:", i)
                node.append(str(where1[i]))
                node.append(str(where2[i]))
            if len(where1) > len(where2):
                node.append(str(where1[-1]))
            node = "".join(node)
            node = MCTSPlayer.rootnode + node.strip()
            return node, False
        else:
            return max(equivalents, key=lambda x: self.gt.get_data(x)[1]), True

    def playout(self, board, player):
        gameover, winner = board.isGameOver()
        if gameover:
            if winner is None:
                return 0
            elif winner == player:
                return 1
            else:
                return -1
        scr = 0
        for _ in range(self.numpl):
            self.game.board = Board(board.state.copy())
            scr += self.game.play() * (1 - 2 * player)
        return scr / self.numpl

    def choose_next_level_node(self, currnode, board):
        # returns node, Terminal (bool)
        children = self.gt.get_children(currnode)
        if len(children) == 0:
            return currnode, True
        childUCTs = list(
            map(lambda cnode: self.calcUCT(
                currnode,
                cnode,
            ), children))
        bestUCT = -1e6
        bestchild = "-------------"
        # print("children: ", children)
        # print("childUCTS: ", childUCTs)
        #terminal = True
        for index in range(len(childUCTs)):
            terminal = self.is_node_terminal(children[index], board)
            if childUCTs[index] is None:
                # print("Child has None UCT index", index, 'child', children[index], 'child unct', childUCTs[index])
                return children[index], terminal
            elif childUCTs[index] > bestUCT:
                # print("Child has not None UCT, index", index, 'child', children[index], 'child unct', childUCTs[index])
                bestUCT = childUCTs[index]
                bestchild = children[index]
                # print('bestchild', bestchild)
        return bestchild, terminal

    def choose_next_node(self, currnode, board):
        parent = currnode
        # Equiv to  while TRUE; maybe change?
        for _ in range(9):
            node, terminal = self.choose_next_level_node(parent, board)
            # print('choose_next_node: node ', node, 'terminal ', terminal)
            if terminal:
                return node
            else:
                parent = node

    def is_node_terminal(self, node, rootboard):
        # returns terminal, winner at current node
        bd = Board(rootboard.state.copy())
        for m in node[1:]:
            bd.pushMove(int(m))
        gameover, _ = bd.isGameOver()
        if gameover:
            # print("GameOver True")
            return True
        elif self.gt.get_data(node)[1] == 0:
            # print("GameOver False, 0 Visits True")
            return True
        else:
            return False

    def calcUCT(self, parentnode, childnode):
        if self.gt.get_data(childnode)[1] == 0:
            return None
        return self.gt.get_data(childnode)[0] / self.gt.get_data(
            childnode)[1] + self.ep * np.sqrt(
                np.log(self.gt.get_data(parentnode)[1]) /
                self.gt.get_data(childnode)[1])