Esempio n. 1
0
def play_user(model):
    '''
    Test the model against human skill level
    '''
    game = TicTacToe(3)
        
    pid = np.random.random_integers(low=1, high=2, size=1)[0]
    winner = None

    while winner is None:

        board = game.get_board(pid)
        print(board)
        
        if pid == 2:
            x, y, prob = evaluate(model, game, pid, tau=.1)
            print(prob)
            print(model.evaluate(game.get_input(pid)))
        else:
            x = int(input('x: '))
            y = int(input('y: '))

        game.place(pid, x, y)
        winner = game.check_win()

        pid = (pid % 2) + 1

    print(game.get_input(1))
Esempio n. 2
0
def debug_run(model):
    '''
    Shows the board and value for each step in a game
    '''
    game = TicTacToe(3)

    pid = np.random.random_integers(low=1, high=2, size=1)[0]
    winner = None
    while winner is None:

        board = game.get_board(pid)

        val = model.evaluate(game.get_input(pid))
        print(board)
        print(val)

        x, y = q_select(pid, board, model, game)

        game.place(pid, x, y)
        winner = game.check_win()

        pid = (pid % 2) + 1

    print(
        model.sess.run(model.probs,
                       feed_dict={model.states: game.get_input(pid)})[0])
Esempio n. 3
0
            print('{}: {}/{}'.format(it, i, EPOCH), end='\r')
            game = TicTacToe(3)
            
            pid = np.random.random_integers(low=1, high=2, size=1)[0]
            winner = None

            inputs = {1: [], 2: []}
            probs = {1 : [], 2: []}
            while winner is None:

                board = game.get_board(pid)

                r_board = game.get_board_raw()
                x, y, prob = evaluate(model, game, (pid % 2) + 1)

                inputs[pid].append(copy.copy(game.get_input(pid)))
                probs[pid].append(prob)

                game.place(pid, x, y)
                winner = game.check_win()

                pid = (pid % 2) + 1
            
            if winner != 0:
                one_reward = [1 if winner == 1 else -1] * len(inputs[1])
                two_reward = [1 if winner == 2 else -1] * len(inputs[2])
            else:
                one_reward = [0] * len(inputs[1])
                two_reward = [0] * len(inputs[2])

            total_inputs = build_input(inputs)