def computeReward(self, state, otherPlayerStates=[]): kingdom, deck, hand, drawPile, discardPile, phase, turn, buys, actions, money, cardsPlayed = state myVPoints = cardUtils.computeVictoryPoints(deck) if self.endOfGame(state): maxVPoints = 0 for otherPlayerState in otherPlayerStates: kingdom1, deck1, hand1, drawPile1, discardPile1, phase1, turn1, buys1, actions1, money1, cardsPlayed1 = otherPlayerState otherPlayerVPoints = cardUtils.computeVictoryPoints(deck1) if otherPlayerVPoints > maxVPoints: maxVPoints = otherPlayerVPoints if len(otherPlayerStates) > 0: #MULTIPLAYER GAME, MOST VPOINTS WINS if maxVPoints > myVPoints: return -100 elif myVPoints > maxVPoints: return 100 else: return 0 else: #single player game if turn == self.maxTurns: return -1 #reward is -1 if we go over the number of turns return 100 - 1 * turn #the fewer turns, the higher the reward else: return 0
def deepFeatureExtractor(state, otherPlayerStates=[], max_turns=100): kingdom, deck, hand, drawPile, discardPile, phase, turn, buys, actions, money, cardsPlayed = state max_cards = 12 numFeatures = computeDeepFeatureLength(max_cards, kingdom) features = np.zeros(numFeatures) vPoints = cardUtils.computeVictoryPoints(deck) maxOtherPlayerVPoints = 0 for state in otherPlayerStates: playerVPoints = cardUtils.computeVPointsFromState(state) if playerVPoints > maxOtherPlayerVPoints: maxOtherPlayerVPoints = playerVPoints vPointsDif = vPoints - maxOtherPlayerVPoints #TURN f_ind = 0 if turn < max_turns: features[f_ind + turn] = 1 f_ind += max_turns # NUMBER OF EACH CARD for cardID in kingdom: if deck[cardID] <= max_cards: features[f_ind + deck[cardID]] = 1 f_ind += max_cards #NUM PROVINCES AND VPOINTSDIF numProvinces = cardUtils.getNumProvinceCards(kingdom) features[f_ind + numProvinces] = 1 f_ind += max_cards features[f_ind] = vPointsDif return features
def learningComputeReward(state, otherPlayerStates=[]): kingdom, deck, hand, drawPile, discardPile, phase, turn, buys, actions, money, cardsPlayed = state myVPoints = cardUtils.computeVictoryPoints(deck) maxVPoints = 0 for otherPlayerState in otherPlayerStates: kingdom1, deck1, hand1, drawPile1, discardPile1, phase1, turn1, buys1, actions1, money1, cardsPlayed1 = otherPlayerState otherPlayerVPoints = cardUtils.computeVictoryPoints(deck1) if otherPlayerVPoints > maxVPoints: maxVPoints = otherPlayerVPoints vPointDif = myVPoints - maxVPoints if maxVPoints > myVPoints: return -50 + vPointDif elif myVPoints > maxVPoints: return 50 + vPointDif else: return 0
def printGameHistory(gameHistory, mdp, allPlayerStates=[], playerID=None): #TODO: readability: /3 , * 3 ??? print "player states: ", allPlayerStates for i in range(len(gameHistory) / 3): state = gameHistory[i * 3 + 2] kingdom, deck, hand, drawPile, discardPile, phase, turn, buys, actions, money, cardsPlayed = state print "Turn:", turn, if phase == "buy": print "Money:", money, action = gameHistory[i * 3] if action[0] == 'buy': buy, buyCardID = action if buyCardID == -1: cardName = "None" else: cardName = cardUtils.getCardFromID(buyCardID).cardName print "Action:", buy, cardName else: print action print "Deck after:", cardUtils.printDeck(deck) print "Drew hand: ", hand print "###############################################" if mdp.endOfGame(state): print "end of game" for ID in range(len(allPlayerStates)): kingdom1, deck1, hand1, drawPile1, discardPile1, phase1, turn1, buys1, actions1, money1, cardsPlayed1 = allPlayerStates[ID] otherPlayerStates = list(allPlayerStates) otherPlayerStates.remove(allPlayerStates[ID]) print "Player ", str(ID), "Game Reward: ", str(mdp.computeReward(allPlayerStates[ID], otherPlayerStates)) print "Number of Victory Points:", str(cardUtils.computeVictoryPoints(deck1)) print "Number of Turns:", turn
def backpropagateFeatureExtractor(state): kingdom, deck, hand, drawPile, discardPile, phase, turn, buys, actions, money, cardsPlayed = state features = [] vPoints = cardUtils.computeVictoryPoints(deck) #NUMBER OF EACH CARD AND TURN for cardID in deck: features.append(("numOfCardsInDeckOfType" + str(cardID) + "=" + str(deck[cardID]) + "turndiv3:" + str(turn / 3), 1)) #DECK VALUE AND TURN deckValue = cardUtils.computeDeckValue(deck) features.append(("deckValue:" + str(deckValue) + "turn:" + str(turn), 1)) #NUM PROVINCES AND TURN numProvinces = cardUtils.getNumProvinceCards(deck) features.append(("provincesInDeck:" + str(deck) + "turn:" + str(turn), 1)) return features
def monishaFeatureExtractor(state, otherPlayerStates=[]): kingdom, deck, hand, drawPile, discardPile, phase, turn, buys, actions, money, cardsPlayed = state features = [] vPoints = cardUtils.computeVictoryPoints(deck) maxOtherPlayerVPoints = 0 for state in otherPlayerStates: playerVPoints = cardUtils.computeVPointsFromState(state) if playerVPoints > maxOtherPlayerVPoints: maxOtherPlayerVPoints = playerVPoints vPointsDif = vPoints - maxOtherPlayerVPoints #NUMBER OF EACH CARD AND TURN for cardID in deck: features.append(("numOfCardsInDeckOfType" + str(cardID) + "=" + str(deck[cardID]) + "turndiv3:" + str(turn / 3), 1)) #DECK VALUE AND TURN deckValue = cardUtils.computeDeckValue(deck) goldConcentration = (deck[2] * 3) / (deckValue + 0.0) features.append(("goldConcentration:" + str(goldConcentration) + "turndiv3:" + str(turn / 3), 1)) #NUM PROVINCES AND TURN numProvinces = cardUtils.getNumProvinceCards(kingdom) features.append(("vPointsDif" + str(vPointsDif) + "provinces:" + str(numProvinces), 3)) return features
def newestFeatureExtractor(state, otherPlayerStates=[]): kingdom, deck, hand, drawPile, discardPile, phase, turn, buys, actions, money, cardsPlayed = state features = [] vPoints = cardUtils.computeVictoryPoints(deck) maxOtherPlayerVPoints = 0 for state in otherPlayerStates: playerVPoints = cardUtils.computeVPointsFromState(state) if playerVPoints > maxOtherPlayerVPoints: maxOtherPlayerVPoints = playerVPoints vPointsDif = vPoints - maxOtherPlayerVPoints #NUMBER OF EACH CARD AND TURN for cardID in deck: features.append(("numOfCardsInDeckOfType" + str(cardID) + "=" + str(deck[cardID]) + "turndiv3:" + str(turn / 3), 1)) #AVERAGE TREASURE VALUE AND TURN averageMoneyValue = (deck[0] + deck[1] * 2 + deck[2] * 3) / (deck[0] + deck[1] + deck[2] + 0.0) roundedAverageMoneyValue = round(averageMoneyValue, 1) features.append(("averageMoneyValue:" + str(roundedAverageMoneyValue) + "turndiv3:" + str(turn / 3), 1)) #NUM PROVINCES AND VPOINTSDIF numProvinces = cardUtils.getNumProvinceCards(kingdom) features.append(("vPointsDif" + str(vPointsDif) + "provinces:" + str(numProvinces), 3)) return features
def gameStageFeatureExtractor(state, otherPlayerStates=[]) : kingdom, deck, hand, drawPile, discardPile, phase, turn, buys, actions, money, cardsPlayed = state features = [] #Compute "Stage" of game opponentState = otherPlayerStates[0] oppkingdom, oppdeck, opphand, oppdrawPile, oppdiscardPile, oppphase, oppturn, oppbuys, oppactions, oppmoney, oppcardsPlayed = opponentState gameStage = "Start" if deck[5] > 0 or oppdeck[5] > 0: gameStage = "Middle" if deck[5] + oppdeck[5] > 6: gameStage = "End" deckValue = cardUtils.computeDeckValue(deck) treasureCount = 0 for cardID in deck: if "treasure" in cardUtils.getCardFromID(cardID).cardType: treasureCount += deck[cardID] averageMoneyValue = (deckValue + 0.0) / treasureCount roundedMoneyValue = round(averageMoneyValue, 1) features.append(("AverageMoneyValue:" + str(roundedMoneyValue) + "Stage" + gameStage, 1)) numProvinces = cardUtils.getNumProvinceCards(deck) features.append(("provincesInDeck:" + str(numProvinces) + "Stage:" + gameStage, 1)) for cardID in deck: features.append(("numOfCardsInDeckOfType" + str(cardID) + "=" + str(deck[cardID]) + "Stage:" + gameStage, 1)) vPoints = cardUtils.computeVictoryPoints(deck) maxOtherPlayerVPoints = 0 for state in otherPlayerStates: playerVPoints = cardUtils.computeVPointsFromState(state) if playerVPoints > maxOtherPlayerVPoints: maxOtherPlayerVPoints = playerVPoints vPointsDif = vPoints - maxOtherPlayerVPoints features.append(("vPointsDif" + str(vPointsDif) + "Stage" + gameStage, 1)) return features