Example #1
0
class ApproximateQLearningAgent:
    def __init__(self, eps, alpha, discount, targetPos, actionStrategy="Absolute"):
        if actionStrategy == "Absolute":
            self.actions = ((-1, -1), (-1, 0), (-1, 1), (1, -1), (1, 0), (1, 1))
        elif actionStrategy == "Differential":
            self.actions = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1))
        else:
            print "Not valid action strategy"
            return
        self.QValues = LinearApproximator(self.actions, targetPos)
        self.eps = eps
        self.alpha = alpha
        self.discount = discount

    def getBestActionMaxQValue(self, state):
        bestAction, maxQValue = self.QValues.getBestActionMaxQValue(state)
        return (bestAction, maxQValue)

    def selectAction(self, state):
        r = random.uniform(0, 1)
        if r < self.eps:
            # print "-- Action selected randomly --"
            return self.actions[random.randint(0, len(self.actions) - 1)]
        else:
            # print "-- Action selected optimally --"
            bestAction = self.getBestActionMaxQValue(state)[0]
            return bestAction

    def update(self, state, action, newState, reward):
        self.QValues.updateWeights(state, action, newState, reward, self.discount, self.alpha)
Example #2
0
 def __init__(self, eps, alpha, discount, targetPos, actionStrategy="Absolute"):
     if actionStrategy == "Absolute":
         self.actions = ((-1, -1), (-1, 0), (-1, 1), (1, -1), (1, 0), (1, 1))
     elif actionStrategy == "Differential":
         self.actions = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1))
     else:
         print "Not valid action strategy"
         return
     self.QValues = LinearApproximator(self.actions, targetPos)
     self.eps = eps
     self.alpha = alpha
     self.discount = discount