def play_to_train(self, Vo, Vx, n_iterations=10000, initial_player=Board.x_symbol):
        '''
        Train
        :param Vo: the value functions of player 'o' (is a dictionary).
                    Example: Vo[1000010000] = 0.35,
                    where 1000010000 is the id of the state,
                    0.35 is the value function of the state 1000010000.
        :param Vx: the value functions of player 'x' (is a dictionary)
        :param n_iterations: the number of times we play
        :return:
        '''
        assert (initial_player == Board.x_symbol or initial_player == Board.o_symbol)
        assert (len(Vx) == len(Vo))
        assert (n_iterations > 0)

        current_turn = initial_player

        for iteration in range(n_iterations):
            if iteration % 1000 == 0:
                print(f'iteration = {iteration}')

            b = Board()
            board = b.board

            current_Vx = []
            current_Vo = []
            current_id_states = []

            draw = False
            game_over = False

            while not draw and not game_over:
                # print('++++++++++++++++++++++++++++++')

                # retrieve all empty cells
                empty_cells = []
                for i in range(b.height):
                    for j in range(b.width):
                        if board[i, j] == 0:
                            empty_cells.append([i, j])
                # print(f'Empty cells = {empty_cells}')

                # just choose one empty cell to play
                if len(empty_cells) > 0:
                    played_cell = np.random.choice(len(empty_cells))
                    board[empty_cells[played_cell][0], empty_cells[played_cell][1]] = b.convert_turn_symbol2id(
                        current_turn)
                    # print(f'Play {current_turn} on {empty_cells[played_cell]}')

                    # update states
                    state = b.get_state()
                    current_id_states.append(state)

                    current_Vx.append(self.AVERAGE_REWARD)
                    current_Vo.append(self.AVERAGE_REWARD)

                    game_over = b.is_game_over()
                    # print(f'Over: {over}')
                    # print(f'Winner: {b.winner}')
                    # print(f'State id: {b.get_state()}')

                    # update turn
                    if current_turn == Board.x_symbol:
                        current_turn = Board.o_symbol
                    else:
                        current_turn = Board.x_symbol
                else:
                    draw = True

                # b.draw()

            # update the value function of states
            if game_over:
                winner = b.get_winner()

                if winner == Board.x_symbol:
                    current_Vx[-1] = self.HIGHEST_REWARD
                    current_Vo[-1] = self.LOWEST_REWARD
                elif winner == Board.o_symbol:
                    current_Vx[-1] = self.LOWEST_REWARD
                    current_Vo[-1] = self.HIGHEST_REWARD

                Vx = self.update_value_function(Vx, current_id_states, current_Vx)
                Vo = self.update_value_function(Vo, current_id_states, current_Vo)

            else:
                # print('Draw')
                pass

            # change the turn in the next play
            # 50% the game starts with the player 1
            # 50% the game starts with the player 2
            if current_turn == Board.x_symbol:
                current_turn = Board.o_symbol
            else:
                current_turn = Board.x_symbol
def play_with_machine(V_machine, human=Board.x_symbol, machine=Board.o_symbol):
    '''

    :param V: value functions of machine
    :param human: 'x' or 'o'
    :param machine: 'o' or 'x'
    :return:
    '''
    current_turn = human

    b = Board()
    board = b.board

    states = []
    states_reward = []

    while not b.is_game_over():

        if current_turn == human:
            print(f'>>> Human turn ({human})')
        else:
            print(f'>>> Machine turn ({machine})')

        if current_turn == human:
            # ask human for move
            available_move = False

            while not available_move:
                print('Let choose the cell you want to play (e.g., 1 2): ')
                inp = input().split(' ')

                row = int(inp[0])
                col = int(inp[1])

                if row > b.height or col > b.width or row < 0 or col < 0:
                    available_move = False
                    print('Wrong move!')
                elif board[row, col] == Board.empty_id:
                    board[row, col] = b.convert_turn_symbol2id(current_turn)
                    available_move = True
                else:
                    available_move = False
                    print('Wrong move!')

        else:
            # find potential moves
            potential_states = []
            potential_moves = []

            for i in range(b.height):
                for j in range(b.width):

                    if board[i, j] == b.empty_id:
                        board[i, j] = b.convert_turn_symbol2id(current_turn)

                        potential_states.append(b.get_state())
                        potential_moves.append([i, j])

                        board[i, j] = b.empty_id

            # find the best move
            if len(potential_moves) > 0:
                best_move_value = 0
                best_move = []
                #print(f'potential_moves = {potential_moves}')

                for idx, state in enumerate(potential_states):
                    print(f'potential move {potential_moves[idx]} = {np.round(V_machine[potential_states[idx]],2)}')

                for move, state in zip(potential_moves, potential_states):
                    if V_machine[state] > best_move_value:
                        best_move_value = V_machine[state]
                        best_move = move

                # play
                print(f'best move: {best_move}')
                board[int(best_move[0]), int(best_move[1])] = b.convert_turn_symbol2id(current_turn)

        # store the state of the board
        states.append(b.get_state())
        states_reward.append(Training.AVERAGE_REWARD)

        # change turn
        if current_turn == human:
            current_turn = machine
        else:
            current_turn = human

        b.draw_board()

    print(f'Winner = {b.winner_symbol}')