def runGame(p1, p2): p1.resetForNewGame() p2.resetForNewGame() d1 = Dealer(5) p1.cards = d1.dealCards()[0] p2.cards = d1.dealCards()[0] gameState = GameState(p1, p2) aqlearnScore = 0 gameLoop = True while gameLoop: p1.pickCard() p2.pickCard() roundWinner = gameState.judgeRound(p1, p2) #? generating successor if roundWinner == p1: p1.accumulatedCards[p1.playedCard[1]] += 1 if roundWinner == p2: p2.accumulatedCards[p2.playedCard[1]] += 1 if gameState.judgeGameOver(p1, p2) == p1: return p1.name elif gameState.judgeGameOver(p1, p2) == p2: return p2.name p1.cards.append(d1.generateRandomCard()) p2.cards.append(d1.generateRandomCard())
def runGame(p1, p2, mute): p1.resetForNewGame() p2.resetForNewGame() d1 = Dealer(5) p1.cards = d1.dealCards()[0] p2.cards = d1.dealCards()[0] gameState = GameState(p1, p2) aqlearnScore = 0 gameLoop = True while gameLoop: # print(bcolors.OKBLUE+"opponent cards:") # for i, x in enumerate(p2.cards): # print(" " + str(i + 1) + ". " + str(x)) # print(bcolors.ENDC) #! aqlearn agent steps #1. update #2. get action #3. generateSuccessor # print("Your cards:") # # print(p1.cards) # for i, x in enumerate(p1.cards): # print(" " + str(i + 1) + ". " + str(x)) # print("Enter the numerical value of the card you'd like to pick.") # choice = int(input(">>> ")) - 1 # if choice >= len(p1.cards): # print(bcolors.WARNING + "Invalid index! Try again..." + bcolors.ENDC) # continue p1.update(gameState, aqlearnScore) action = p1.doAction(gameState) p1.pickCard(action) p2.pickCard() #call agent.update() # roundWinner = gameState.judgeRound(p1, p2) #? generating successor if roundWinner == p1: # if not mute: # print(bcolors.OKGREEN + "APQ won that round!" + bcolors.ENDC) p1.accumulatedCards[p1.playedCard[1]] += 1 if roundWinner == p2: # if not mute: # print(bcolors.FAIL + "APQ lost that round." + bcolors.ENDC) p2.accumulatedCards[p2.playedCard[1]] += 1 #? get transitional rewards p1TransitionalReward = gameState.getRewards(p1, p2) aqlearnScore += p1TransitionalReward # print(bcolors.OKGREEN + "TransitionalRewards:", str(p1TransitionalReward) + bcolors.ENDC ) # print(bcolors.OKGREEN + "Total Score:", str(aqlearnScore) + bcolors.ENDC ) # print(" Score ") # print("***************************") # print(p1.name + ": ") # print("Fire: " + str(p1.accumulatedCards["Fire"])) # print("Water: " + str(p1.accumulatedCards["Water"])) # print("Ice: " + str(p1.accumulatedCards["Ice"])) # print(p2.name + ": ") # print("Fire: " + str(p2.accumulatedCards["Fire"])) # print("Water: " + str(p2.accumulatedCards["Water"])) # print("Ice: " + str(p2.accumulatedCards["Ice"])) # print("") if gameState.judgeGameOver(p1, p2) == p1: p1.update(gameState, aqlearnScore) if not mute: print(bcolors.OKBLUE + "Game Over!", p1.name + " wins!" + bcolors.ENDC) p1.printEpisodeInfo() return (aqlearnScore, "aql") elif gameState.judgeGameOver(p1, p2) == p2: p1.update(gameState, aqlearnScore) if not mute: print(bcolors.FAIL + "Game Over!", p2.name + " wins!" + bcolors.ENDC) p1.printEpisodeInfo() return (aqlearnScore, "greedy") p1.cards.append(d1.generateRandomCard()) p2.cards.append(d1.generateRandomCard())