winner = '' counter = 0 symbols = ['X', 'O'] #we need to store samples temporarily because we don't get their values till the end of each game samples = [ ] #each sample contains state, action, reward, and next state while winner == '': state = state_from_board(board, counter) action = choose_action(epsilon, state, model, sess) current_sample = [] current_sample.append(state) current_sample.append(action) winner = board.setSquare(action, symbols[counter]) current_sample.append( 0.5 ) #placeholder reward. we change this when we know the winner samples.append(current_sample) #switch to next player counter = (counter + 1) % 2 #lol this is so ugly xreward = 0 if winner == 'X': xreward = 0.5 elif winner == 'O': xreward = -0.5
player_symbol = input('Invalid symbol. Pick X or O: ') if player_symbol == 'X': player_num = 0 else: player_num = 1 winner = '' counter = 0 symbols = ['X', 'O'] while winner == '': board.printBoard() if counter == player_num: index = input('Choose an index for a square: ') while not is_int(index) or int(index) < 0 or int( index) > 8 or board.getSquare(int(index)) != 0: index = input('Your entry was invalid. Choose again: ') index = int(index) else: index = ai_pick(board, counter) print(symbols[counter] + ' chooses index ' + str(index)) print() winner = board.setSquare(index, symbols[counter]) counter = (counter + 1) % 2 board.printBoard() if winner == 'D': print('Draw!') else: print(winner + ' won!')