Example #1
0
def test_against_random(model, size=100):
    '''
    Evaluate the model against random performance
    '''
    
    wins = {0: 0, 1: 0, 2: 0}
    
    for _ in range(size):
        game = TicTacToe(3)
        
        pid = np.random.random_integers(low=1, high=2, size=1)[0]
        winner = None
        while winner is None:

            board = game.get_board(pid)

            if pid == 1:
                x, y = random_choice(board)
            else:
                r_board = game.get_board_raw()
                x, y, _ = evaluate(model, game, pid, tau=1)

            game.place(pid, x, y)

            winner = game.check_win()

            pid = (pid % 2) + 1

        wins[winner] += 1

    print('Wins: %d Ties: %d Losses: %d' % (wins[2], wins[0], wins[1]))
    return (wins[2] / (wins[0] + wins[1] + wins[2]))
Example #2
0
    for it in range(ITER):
        for i in range(EPOCH):
            print('{}: {}/{}'.format(it, i, EPOCH), end='\r')
            game = TicTacToe(3)
            
            pid = np.random.random_integers(low=1, high=2, size=1)[0]
            winner = None

            inputs = {1: [], 2: []}
            probs = {1 : [], 2: []}
            while winner is None:

                board = game.get_board(pid)

                r_board = game.get_board_raw()
                x, y, prob = evaluate(model, game, (pid % 2) + 1)

                inputs[pid].append(copy.copy(game.get_input(pid)))
                probs[pid].append(prob)

                game.place(pid, x, y)
                winner = game.check_win()

                pid = (pid % 2) + 1
            
            if winner != 0:
                one_reward = [1 if winner == 1 else -1] * len(inputs[1])
                two_reward = [1 if winner == 2 else -1] * len(inputs[2])
            else:
                one_reward = [0] * len(inputs[1])