class ApproximateQLearningAgent: def __init__(self, eps, alpha, discount, targetPos, actionStrategy="Absolute"): if actionStrategy == "Absolute": self.actions = ((-1, -1), (-1, 0), (-1, 1), (1, -1), (1, 0), (1, 1)) elif actionStrategy == "Differential": self.actions = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1)) else: print "Not valid action strategy" return self.QValues = LinearApproximator(self.actions, targetPos) self.eps = eps self.alpha = alpha self.discount = discount def getBestActionMaxQValue(self, state): bestAction, maxQValue = self.QValues.getBestActionMaxQValue(state) return (bestAction, maxQValue) def selectAction(self, state): r = random.uniform(0, 1) if r < self.eps: # print "-- Action selected randomly --" return self.actions[random.randint(0, len(self.actions) - 1)] else: # print "-- Action selected optimally --" bestAction = self.getBestActionMaxQValue(state)[0] return bestAction def update(self, state, action, newState, reward): self.QValues.updateWeights(state, action, newState, reward, self.discount, self.alpha)
def __init__(self, eps, alpha, discount, targetPos, actionStrategy="Absolute"): if actionStrategy == "Absolute": self.actions = ((-1, -1), (-1, 0), (-1, 1), (1, -1), (1, 0), (1, 1)) elif actionStrategy == "Differential": self.actions = ((-1, -1), (-1, 0), (-1, 1), (0, -1), (0, 0), (0, 1), (1, -1), (1, 0), (1, 1)) else: print "Not valid action strategy" return self.QValues = LinearApproximator(self.actions, targetPos) self.eps = eps self.alpha = alpha self.discount = discount