Python FAEstimator 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: dopamine.agents.valuebased.faestimator

클래스/타입: FAEstimator

hotexamples.com에서의 예제들: 5

Python FAEstimator - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 dopamine.agents.valuebased.faestimator.FAEstimator에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

getBestAction(2)

getValue(2)

reset(2)

train(2)

updateValue(2)

__init__(1)

rememberAction(1)

resetMemory(1)

예제 #1

파일 보기

파일: fqi.py 프로젝트: rueckstiess/dopamine

 def _setup(self, conditions):
     """ if agent is discrete in states and actions create Q-Table. """
     Agent._setup(self, conditions)
     if not (self.conditions['discreteStates'] == False and self.conditions['discreteActions']):
         raise AgentException('FQIAgent expects continuous states and discrete actions. Use adapter or a different environment.')
     
     if self.vectorblock:
         self.estimator = VectorBlockEstimator(self.conditions['stateDim'], self.conditions['actionNum'], faClass=self.faClass, ordered=self.ordered)
     else:
         self.estimator = FAEstimator(self.conditions['stateDim'], self.conditions['actionNum'], faClass=self.faClass, ordered=self.ordered)

예제 #2

파일 보기

파일: bas.py 프로젝트: rueckstiess/dopamine

 def _setup(self, conditions):
     """ if agent is discrete in states and actions create Q-Table. """
     Agent._setup(self, conditions)
     if not (self.conditions['discreteStates'] == False and self.conditions['discreteActions'] == False):
         raise AgentException('BASAgent expects continuous states and actions. Use adapter or a different environment.')
         
     self.estimator = FAEstimator(self.conditions['stateDim'] + self.conditions['actionDim'], 2**self.conditions['actionDim'], self.faClass)
     
     # change history to store bas-extended experiences
     self.history = History(conditions['stateDim']+self.conditions['actionDim'] , 1)

예제 #3

파일 보기

파일: fqi.py 프로젝트: rueckstiess/dopamine

class FQIAgent(Agent):
    
    alpha = 1.0
    gamma = 0.9
    iterations = 1
    presentations = 1
    
    def __init__(self, faClass=Linear, resetFA=True, ordered=False, vectorblock=False):
        """ initialize the agent with the estimatorClass. """
        Agent.__init__(self)
        self.faClass = faClass
        self.resetFA = resetFA
        self.ordered = ordered
        self.vectorblock = vectorblock

    
    def _setup(self, conditions):
        """ if agent is discrete in states and actions create Q-Table. """
        Agent._setup(self, conditions)
        if not (self.conditions['discreteStates'] == False and self.conditions['discreteActions']):
            raise AgentException('FQIAgent expects continuous states and discrete actions. Use adapter or a different environment.')
        
        if self.vectorblock:
            self.estimator = VectorBlockEstimator(self.conditions['stateDim'], self.conditions['actionNum'], faClass=self.faClass, ordered=self.ordered)
        else:
            self.estimator = FAEstimator(self.conditions['stateDim'], self.conditions['actionNum'], faClass=self.faClass, ordered=self.ordered)
    

    def _calculate(self):
        self.action = self.estimator.getBestAction(self.state)
    
    
    def newEpisode(self):
        """ reset the memory. """
        Agent.newEpisode(self)

        if self.ordered:
            self.estimator.resetMemory()
    

    def giveReward(self, reward):
        """ additionally remember the chosen action to not draw it again. """
        if self.ordered:
            self.estimator.rememberAction(self.action)
        
        Agent.giveReward(self, reward)    

    
    def buildMemoryFromEpisode(self, episode):
        """ builds the memory for already executed actions from the current
            episode. this is necessary if the episode was appended without
            actually being executed via the experiments interact() method. 
        """
        if self.ordered:
            for a in episode.actions:
                self.estimator.rememberAction(a)
                
    
    def learn(self):
        """ go through whole episode and make Q-value updates. """  

        for i in range(self.iterations):
            dataset = []
            
            for episode in self.history:
                if self.ordered:
                    self.estimator.resetMemory()

                for state, action, reward, nextstate in episode:                    
                    qvalue = self.estimator.getValue(state, action)
                    if self.ordered:
                        self.estimator.rememberAction(action)
                    if nextstate != None:
                        bestnext = self.estimator.getValue(nextstate, self.estimator.getBestAction(nextstate))
                    else:
                        bestnext = 0.
                    target = (1-self.alpha) * qvalue + self.alpha * (reward + self.gamma * bestnext)

                    dataset.append([state, action, target])

            if len(dataset) == 0:
                continue
                
            # ground targets to 0 to avoid drifting values
            mintarget = min(map(itemgetter(2), dataset))
            
            # reset estimator (not resetting might lead to faster convergence!)
            if self.resetFA:
                self.estimator.reset()
            for i in range(self.presentations):
                shuffle(dataset)
                for state, action, target in dataset:
                    self.estimator.updateValue(state, action, target-mintarget)
            self.estimator.train()

예제 #4

파일 보기

파일: bas.py 프로젝트: rueckstiess/dopamine

class BASAgent(Agent):
    
    alpha = 1.0
    gamma = 0.9
    
    def __init__(self, faClass=Linear):
        """ initialize the agent with the estimatorClass. """
        Agent.__init__(self)
        
        self.amin = -1.
        self.amax = 1.
        self.nres = 3
        
        # store (decision,action) tuples for one action in the list
        self.decisions = []
        
        self.faClass = faClass
    
    def _setup(self, conditions):
        """ if agent is discrete in states and actions create Q-Table. """
        Agent._setup(self, conditions)
        if not (self.conditions['discreteStates'] == False and self.conditions['discreteActions'] == False):
            raise AgentException('BASAgent expects continuous states and actions. Use adapter or a different environment.')
            
        self.estimator = FAEstimator(self.conditions['stateDim'] + self.conditions['actionDim'], 2**self.conditions['actionDim'], self.faClass)
        
        # change history to store bas-extended experiences
        self.history = History(conditions['stateDim']+self.conditions['actionDim'] , 1)
    
    
    def giveReward(self, reward):
        """ override function to store the internal actions in the history. """
        if self.progressCnt == 2:
            self.reward = reward
            self.progressCnt = 0
            if self.loggingEnabled:
                # go through internal decisions and transform them to states, actions, rewards
                olda = array([(self.amax + self.amin) / 2.]*self.conditions['actionDim'])
                for i, (d,a) in enumerate(self.decisions):
                    state = r_[self.state, olda]
                    action = d
                    
                    if i < self.nres-1:
                        reward = 0.
                    else:
                        reward = self.reward
                    
                    self.history.append(state, action, reward)
                    olda = a                   
                    

        else:
            raise AgentException('reward was given before action was returned.')
    
    def _internalDecisions(self, state):
        """ takes a state and queries the estimator several times as a binary search.
            generates (binary) decision and action at each timestep. """
        
        self.decisions = []
        
        a = array([(self.amax + self.amin) / 2.]*self.conditions['actionDim'])
        delta = (self.amax - self.amin) * float(2**(self.nres-1)) / (2**self.nres -1)
        for i in range(self.nres):
            delta = delta/2.
            decision = self.estimator.getBestAction(r_[self.state, a])
            
            # internal epsilon-greedy exploration
            if random.random() < 0.1:
                decision = array([random.randint(2**self.conditions['actionDim'])])

            # turn into binary list
            blist = -1.*ones(self.conditions['actionDim'])
            for i,bit in enumerate(reversed(bin(decision)[2:])):
                if bit == '1':
                    blist[-i-1] = 1.
            
            # update action
            a = a + delta*blist
            self.decisions.append((decision, a))
            
        return a
                
    def _calculate(self):
        """ Return the action with the maximal value for the given state. """
        self.action = self._internalDecisions(self.state)


    def learn(self):
        """ go through whole episode and make Q-value updates. """  
        for i in range(1):
            
            self.estimator.reset()

            for episode in self.history:
                for state, action, reward, nextstate in episode:
                    # # don't consider last state
                    # if equal(state, nextstate).all():
                    #     break

                    qvalue = self.estimator.getValue(state, action)
                    bestnext = self.estimator.getValue(nextstate, self.estimator.getBestAction(nextstate))
                    target = (1-self.alpha) * qvalue + self.alpha * (reward + self.gamma * bestnext)

                    self.estimator.updateValue(state, action, target)

            self.estimator.train()

예제 #5

파일 보기

파일: vblockestimator.py 프로젝트: rueckstiess/dopamine

 def __init__(self, stateDim, actionNum, faClass=Linear, ordered=False):
     self.fa = None
     FAEstimator.__init__(self, stateDim, actionNum, faClass, ordered)