Example #1
0
def TDUpdate(state, nextState, reward, w, eta=1e-1):
    features = util.extractFeatures(state)
    value = util.logisticValue(w, features)
    residual = reward - value
    if nextState is not None:
        nextFeatures = util.extractFeatures(nextState)
        residual += util.logisticValue(w, nextFeatures)
    gradient = value * (1 - value) * features
    newWeights = w + eta * residual * gradient
    return newWeights
Example #2
0
def TDUpdate(state, nextState, reward, w, eta=1e-1):
    features = util.extractFeatures(state)
    value = util.logisticValue(w, features)
    residual = reward - value
    if nextState is not None:
        nextFeatures = util.extractFeatures(nextState)
        residual += util.logisticValue(w, nextFeatures)
    gradient = value * (1 - value) * features
    newWeights = w + eta * residual * gradient
    return newWeights
Example #3
0
    def getValueRec(self, agent, game, card, depth, alpha, beta):
        otherAgent = int(not agent)
        gameClone = copy.deepcopy(game)
        gameClone.playCard(otherAgent, card)
        if gameClone.roundOver():
            gameClone.endRound()

        if gameClone.gameOver() and gameClone.isWinner(self.playerNum):
            return 1
        elif gameClone.gameOver() and gameClone.isLoser(self.playerNum):
            return 0
        elif depth == 0:
            state = gameClone.getState(agent)
            features = util.extractFeatures(state)
            if agent == gameClone.attacker:
                weights = self.w_atk
            else:
                weights = self.w_def
            return util.logisticValue(weights, features)

        if otherAgent == self.playerNum:
            depth -= 1
        if agent == gameClone.attacker:
            cards = gameClone.getAttackOptions(agent)
        else:
            cards = gameClone.getDefendOptions(agent)

        if agent == self.playerNum:
            v = float('-inf')
            for card in cards:
                v = max(
                    v,
                    self.getValueRec(otherAgent, gameClone, card, depth, alpha,
                                     beta))
                if v >= beta:
                    return v
                alpha = max(alpha, v)
            return v
        else:
            v = float('+inf')
            for card in cards:
                v = min(
                    v,
                    self.getValueRec(otherAgent, gameClone, card, depth, alpha,
                                     beta))
                if v <= alpha:
                    return v
                beta = min(beta, v)
            return v
Example #4
0
    def getValue(self, card, game):
        gameClone = copy.deepcopy(game)
        gameClone.playCard(self.playerNum, card)
        state = gameClone.getState(self.playerNum)

        if state['isAttacker']:
            if card == dk.Durak.END_ROUND:
                state['isAttacker'] = False
                weights = self.w_def
            else:
                weights = self.w_atk
        else:
            weights = self.w_def
            state['hand'].addCards(state['table'].getCards())

        features = util.extractFeatures(state)
        return util.logisticValue(weights, features)
Example #5
0
    def getValue(self, card, game):
        gameClone = copy.deepcopy(game)
        gameClone.playCard(self.playerNum, card)
        state = gameClone.getState(self.playerNum)

        if state['isAttacker']:
            if card == dk.Durak.END_ROUND:
                state['isAttacker'] = False
                weights = self.w_def
            else:
                weights = self.w_atk
        else:
            weights = self.w_def
            state['hand'].addCards(state['table'].getCards())

        features = util.extractFeatures(state)
        return util.logisticValue(weights, features)
Example #6
0
    def getValueRec(self, agent, game, card, depth, alpha, beta):
        otherAgent = int(not agent)
        gameClone = copy.deepcopy(game)
        gameClone.playCard(otherAgent, card)
        if gameClone.roundOver():
            gameClone.endRound()

        if gameClone.gameOver() and gameClone.isWinner(self.playerNum):
            return 1
        elif gameClone.gameOver() and gameClone.isLoser(self.playerNum):
            return 0
        elif depth == 0:
            state = gameClone.getState(agent)
            features = util.extractFeatures(state)
            if agent == gameClone.attacker:
                weights = self.w_atk
            else:
                weights = self.w_def
            return util.logisticValue(weights, features)

        if otherAgent == self.playerNum:
            depth -= 1
        if agent == gameClone.attacker:
            cards = gameClone.getAttackOptions(agent)
        else:
            cards = gameClone.getDefendOptions(agent)

        if agent == self.playerNum:
            v = float('-inf')
            for card in cards:
                v = max(v, self.getValueRec(otherAgent, gameClone, card, depth, alpha, beta))
                if v >= beta:
                    return v
                alpha = max(alpha, v)
            return v
        else:
            v = float('+inf')
            for card in cards:
                v = min(v, self.getValueRec(otherAgent, gameClone, card, depth, alpha, beta))
                if v <= alpha:
                    return v
                beta = min(beta, v)
            return v