def __init__(self, faClass=Linear, resetFA=True, ordered=False, vectorblock=False): """ initialize the agent with the estimatorClass. """ Agent.__init__(self) self.faClass = faClass self.resetFA = resetFA self.ordered = ordered self.vectorblock = vectorblock
def _setup(self, conditions): """ if agent is discrete in states and actions create Q-Table. """ Agent._setup(self, conditions) if not (self.conditions['discreteStates'] and self.conditions['discreteActions']): raise AgentException('QAgent expects discrete states and actions. Use adapter or a different environment.') self.estimator = TableEstimator(self.conditions['stateNum'], self.conditions['actionNum'])
def _setup(self, conditions): """ if agent is discrete in states and actions create Q-Table. """ Agent._setup(self, conditions) if not (self.conditions['discreteStates'] == False and self.conditions['discreteActions']): raise AgentException('FQIAgent expects continuous states and discrete actions. Use adapter or a different environment.') if self.vectorblock: self.estimator = VectorBlockEstimator(self.conditions['stateDim'], self.conditions['actionNum'], faClass=self.faClass, ordered=self.ordered) else: self.estimator = FAEstimator(self.conditions['stateDim'], self.conditions['actionNum'], faClass=self.faClass, ordered=self.ordered)
def _setup(self, conditions): """ if agent is discrete in states and actions create Q-Table. """ Agent._setup(self, conditions) if not (self.conditions['discreteStates'] == False and self.conditions['discreteActions'] == False): raise AgentException('BASAgent expects continuous states and actions. Use adapter or a different environment.') self.estimator = FAEstimator(self.conditions['stateDim'] + self.conditions['actionDim'], 2**self.conditions['actionDim'], self.faClass) # change history to store bas-extended experiences self.history = History(conditions['stateDim']+self.conditions['actionDim'] , 1)
def __init__(self, faClass=Linear): """ initialize the agent with the estimatorClass. """ Agent.__init__(self) self.amin = -1. self.amax = 1. self.nres = 3 # store (decision,action) tuples for one action in the list self.decisions = [] self.faClass = faClass
def giveReward(self, reward): """ additionally remember the chosen action to not draw it again. """ if self.ordered: self.estimator.rememberAction(self.action) Agent.giveReward(self, reward)
def newEpisode(self): """ reset the memory. """ Agent.newEpisode(self) if self.ordered: self.estimator.resetMemory()