Exemple #1
0
    player_symbol = input('Invalid symbol. Pick X or O: ')

if player_symbol == 'X':
    player_num = 0
else:
    player_num = 1

winner = ''
counter = 0
symbols = ['X', 'O']
while winner == '':
    board.printBoard()
    if counter == player_num:
        index = input('Choose an index for a square: ')
        while not is_int(index) or int(index) < 0 or int(
                index) > 8 or board.getSquare(int(index)) != 0:
            index = input('Your entry was invalid. Choose again: ')
        index = int(index)
    else:
        index = ai_pick(board, counter)
        print(symbols[counter] + ' chooses index ' + str(index))
        print()
    winner = board.setSquare(index, symbols[counter])
    counter = (counter + 1) % 2

board.printBoard()
if winner == 'D':
    print('Draw!')
else:
    print(winner + ' won!')
Exemple #2
0
        board = Board()
        winner = ''
        counter = 0
        symbols = ['X', 'O']
        #we need to store samples temporarily because we don't get their values till the end of each game
        samples = []#each sample contains state, action, reward, and next state
        while winner == '':
            state = state_from_board(board, counter)
            
            action = choose_action(epsilon, state, model, sess)

            current_sample = []
            current_sample.append(state)
            current_sample.append(action)
            
            if board.getSquare(action) == 0:
                winner = board.setSquare(action, symbols[counter])
                current_sample.append(1)#placeholder reward. we change this when we know the winner
            else:
                winner = 'ERR'
                current_sample.append(-2)#if an invalid move was made, give a bad reward

            samples.append(current_sample)
            #switch to next player
            counter = (counter + 1)%2

        #this loop adds the next state to each sample
        num_samples = len(samples)
        for i in range(num_samples):
            if i < num_samples - 2:
                samples[i].append(samples[i + 2][0])