def test_evaluate_graphic():
    """
    test evaluate with graphical output
    """
    while input('press q to quit: ') != 'q':
        tmp = np.random.randint(3, size=(19, 19))
        board = np.zeros((1, 19, 19, 3), dtype=int)
        for y in range(19):
            for x in range(19):
                if tmp[y, x] == 1:
                    board[0, y, x, 0] = 1
                elif tmp[y, x] == 2:
                    board[0, y, x, 1] = 1

        wpos = []
        for y in range(19):
            for x in range(19):
                if evaluate(board, 0, [y, x]):
                    wpos.append([y, x])
        for y in range(19):
            for x in range(19):
                if evaluate(board, 1, [y, x]):
                    wpos.append([y, x])

        for y in range(19):
            for x in range(19):
                v = board[0, y, x, 0] + board[0, y, x, 1] * 2
                if [y, x] in wpos:
                    print('\033[92m{}\033[0m'.format(v), end='')
                else:
                    print(v, end='')
            print()
    pass
Example #2
0
def test_against_random(model, size=100):
    '''
    Evaluate the model against random performance
    '''
    
    wins = {0: 0, 1: 0, 2: 0}
    
    for _ in range(size):
        game = TicTacToe(3)
        
        pid = np.random.random_integers(low=1, high=2, size=1)[0]
        winner = None
        while winner is None:

            board = game.get_board(pid)

            if pid == 1:
                x, y = random_choice(board)
            else:
                r_board = game.get_board_raw()
                x, y, _ = evaluate(model, game, pid, tau=1)

            game.place(pid, x, y)

            winner = game.check_win()

            pid = (pid % 2) + 1

        wins[winner] += 1

    print('Wins: %d Ties: %d Losses: %d' % (wins[2], wins[0], wins[1]))
    return (wins[2] / (wins[0] + wins[1] + wins[2]))
Example #3
0
def play_user(model):
    '''
    Test the model against human skill level
    '''
    game = TicTacToe(3)
        
    pid = np.random.random_integers(low=1, high=2, size=1)[0]
    winner = None

    while winner is None:

        board = game.get_board(pid)
        print(board)
        
        if pid == 2:
            x, y, prob = evaluate(model, game, pid, tau=.1)
            print(prob)
            print(model.evaluate(game.get_input(pid)))
        else:
            x = int(input('x: '))
            y = int(input('y: '))

        game.place(pid, x, y)
        winner = game.check_win()

        pid = (pid % 2) + 1

    print(game.get_input(1))
def basic_win():
    """
    test with simple env
    """
    e = np.array([[0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
    board = conv_map(e)
    player = 0
    pos = (0, 0)
    root = Node(0)
    net = Network(-1)

    return evaluate(board, player, pos)
def test_winning_move_when_one():
    """
    test if network find winning move among one
    return true if winning move is found
    return else otherwise
    """
    while True:
        has_winning_move = 0
        tmp = np.random.randint(3, size=(19, 19))
        board = np.zeros((1, 19, 19, 3), dtype=int)

        for y in range(19):
            for x in range(19):
                if tmp[y, x] == 1:
                    board[0, y, x, 0] = 1
                    if evaluate(board, 0, [y, x]):
                        board[0, y, x, 0] = 0
                        if has_winning_move:
                            has_winning_move = 1
                elif tmp[y, x] == 2:
                    board[0, y, x, 1] = 1
                    if evaluate(board, 1, [y, x]):
                        board[0, y, x, 1] = 0
                        if has_winning_move:
                            has_winning_move = 2
        if not has_winning_move:
            continue

        network = Network(0)
        node = Node(0)
        expand(node, board, has_winning_move - 1, network)

        turn(board, has_winning_move - 1, node, network)

        for y in range(19):
            for x in range(19):
                if evaluate(board, has_winning_move - 1, [y, x]):
                    return True

        return False
Example #6
0
def human_turn(board, node, player, net):
    print("Your turn, Human")

    e = 0
    while (not e):
        try:
            x = int(input("x: "))
            y = int(input("y: "))
            e = 1
        except:
            print("wrong format, only integers required")
            q = input("quit? (y/n): ")
            if q == "y":
                exit(0)

    pos = (x, y)

    put_on_board(board, pos, player, 1)

    node = update_turn(board, player ^ 1, node, net, pos)
    _, r = evaluate(board, player, pos)

    return node, r
Example #7
0
    for it in range(ITER):
        for i in range(EPOCH):
            print('{}: {}/{}'.format(it, i, EPOCH), end='\r')
            game = TicTacToe(3)
            
            pid = np.random.random_integers(low=1, high=2, size=1)[0]
            winner = None

            inputs = {1: [], 2: []}
            probs = {1 : [], 2: []}
            while winner is None:

                board = game.get_board(pid)

                r_board = game.get_board_raw()
                x, y, prob = evaluate(model, game, (pid % 2) + 1)

                inputs[pid].append(copy.copy(game.get_input(pid)))
                probs[pid].append(prob)

                game.place(pid, x, y)
                winner = game.check_win()

                pid = (pid % 2) + 1
            
            if winner != 0:
                one_reward = [1 if winner == 1 else -1] * len(inputs[1])
                two_reward = [1 if winner == 2 else -1] * len(inputs[2])
            else:
                one_reward = [0] * len(inputs[1])
                two_reward = [0] * len(inputs[2])