def TDUpdate(state, nextState, reward, w, eta=1e-1): features = util.extractFeatures(state) value = util.logisticValue(w, features) residual = reward - value if nextState is not None: nextFeatures = util.extractFeatures(nextState) residual += util.logisticValue(w, nextFeatures) gradient = value * (1 - value) * features newWeights = w + eta * residual * gradient return newWeights
def getValueRec(self, agent, game, card, depth, alpha, beta): otherAgent = int(not agent) gameClone = copy.deepcopy(game) gameClone.playCard(otherAgent, card) if gameClone.roundOver(): gameClone.endRound() if gameClone.gameOver() and gameClone.isWinner(self.playerNum): return 1 elif gameClone.gameOver() and gameClone.isLoser(self.playerNum): return 0 elif depth == 0: state = gameClone.getState(agent) features = util.extractFeatures(state) if agent == gameClone.attacker: weights = self.w_atk else: weights = self.w_def return util.logisticValue(weights, features) if otherAgent == self.playerNum: depth -= 1 if agent == gameClone.attacker: cards = gameClone.getAttackOptions(agent) else: cards = gameClone.getDefendOptions(agent) if agent == self.playerNum: v = float('-inf') for card in cards: v = max( v, self.getValueRec(otherAgent, gameClone, card, depth, alpha, beta)) if v >= beta: return v alpha = max(alpha, v) return v else: v = float('+inf') for card in cards: v = min( v, self.getValueRec(otherAgent, gameClone, card, depth, alpha, beta)) if v <= alpha: return v beta = min(beta, v) return v
def getValue(self, card, game): gameClone = copy.deepcopy(game) gameClone.playCard(self.playerNum, card) state = gameClone.getState(self.playerNum) if state['isAttacker']: if card == dk.Durak.END_ROUND: state['isAttacker'] = False weights = self.w_def else: weights = self.w_atk else: weights = self.w_def state['hand'].addCards(state['table'].getCards()) features = util.extractFeatures(state) return util.logisticValue(weights, features)
def getValueRec(self, agent, game, card, depth, alpha, beta): otherAgent = int(not agent) gameClone = copy.deepcopy(game) gameClone.playCard(otherAgent, card) if gameClone.roundOver(): gameClone.endRound() if gameClone.gameOver() and gameClone.isWinner(self.playerNum): return 1 elif gameClone.gameOver() and gameClone.isLoser(self.playerNum): return 0 elif depth == 0: state = gameClone.getState(agent) features = util.extractFeatures(state) if agent == gameClone.attacker: weights = self.w_atk else: weights = self.w_def return util.logisticValue(weights, features) if otherAgent == self.playerNum: depth -= 1 if agent == gameClone.attacker: cards = gameClone.getAttackOptions(agent) else: cards = gameClone.getDefendOptions(agent) if agent == self.playerNum: v = float('-inf') for card in cards: v = max(v, self.getValueRec(otherAgent, gameClone, card, depth, alpha, beta)) if v >= beta: return v alpha = max(alpha, v) return v else: v = float('+inf') for card in cards: v = min(v, self.getValueRec(otherAgent, gameClone, card, depth, alpha, beta)) if v <= alpha: return v beta = min(beta, v) return v