Example #1
0
    def getAction(self):
        # get best action for every state observation
        # overlay all action values for every state observation, pick best
        LoggingAgent.getAction(self)

        # for each color, get best action, then pick highest-value action
        # among those actions
        actions = []
        values = []
        # TODO: why are same values printed many times in a row here?
        #print '========== in agent =========='
        #print 'states:', [[i] for i in self.lastobs.flatten()]
        for state in self.lastobs:
            #print 'state:', state
            actions.append(self.module.activate(state))
            values.append(self.module.lastMaxActionValue)
            #self.module.printState(state)
            #print ' best:', actions[-1], 'value:', values[-1]
        actionIdx = where(values == max(values))[0]
        ch = choice(actionIdx)
        self.lastaction = actions[ch]
        self.bestState = self.lastobs[ch]

        #print 'assigning reward to state', self.bestState
        #print 'chosen action:', self.lastaction, 'value:', max(values)
        # add a chance to pick a random other action
        if self.learning:
            self.lastaction = self.learner.explore(self.lastobs, self.lastaction)

        #print 'after explorer:', self.lastaction
        #print '============= end ============'
        return self.lastaction
Example #2
0
    def getAction(self):
        # get best action for every state observation
        # overlay all action values for every state observation, pick best
        LoggingAgent.getAction(self)

        # for each color, get best action, then pick highest-value action
        # among those actions
        actions = []
        values = []
        # TODO: why are same values printed many times in a row here?
        #print '========== in agent =========='
        #print 'states:', [[i] for i in self.lastobs.flatten()]
        for state in self.lastobs:
            #print 'state:', state
            actions.append(self.module.activate(state))
            values.append(self.module.lastMaxActionValue)
            #self.module.printState(state)
            #print ' best:', actions[-1], 'value:', values[-1]
        actionIdx = where(values == max(values))[0]
        ch = choice(actionIdx)
        self.lastaction = actions[ch]
        self.bestState = self.lastobs[ch]

        #print 'assigning reward to state', self.bestState
        #print 'chosen action:', self.lastaction, 'value:', max(values)
        # add a chance to pick a random other action
        if self.learning:
            self.lastaction = self.learner.explore(self.lastobs,
                                                   self.lastaction)

        #print 'after explorer:', self.lastaction
        #print '============= end ============'
        return self.lastaction
Example #3
0
    def getAction(self):
        """ Activate the module with the last observation, add the exploration from
            the explorer object and store the result as last action. """
        LoggingAgent.getAction(self)

        self.lastaction = self.module.activate(self.lastobs)

        if self.learning:
            self.lastaction = self.learner.explore(self.lastobs, self.lastaction)

        return self.lastaction
    def getAction(self):
        """ Activate the module with the last observation, add the exploration from
            the explorer object and store the result as last action. """
        LoggingAgent.getAction(self)       
        
        # Here is where the table or neural network returns the action
        # This consists of the values of the different actions
        # We choose the action with highet value 
       
        from numpy import argmax, size        
        tempAction =  self.module.activate(self.lastobs)
        if (tempAction.size > 1):     
            bestAction = argmax(tempAction)
            self.lastaction = [bestAction]
           
        else: # Original Code (used still for stuff like table lookup)
            self.lastaction = self.module.activate(self.lastobs)
         
        if self.learning:            
            self.lastaction = self.learner.explore(self.lastobs, self.lastaction)

        return self.lastaction
Example #5
0
    def getAction(self):
        # get best action for every state observation
        # overlay all action values for every state observation, pick best
        LoggingAgent.getAction(self)

        # for each color, get best action, then pick highest-value action
        # among those actions
        actions = []
        values = []
        num_colors = len(self.lastobs[0])
        # TODO: why are same values printed many times in a row here? episodes
        #print '========== in agent =========='
        #print 'states:', self.lastobs
        for board_loc in self.lastobs:
            for color_state in board_loc:
                #print 'state:', color_state
                actions.append(self.module.activate(color_state))
                values.append(self.module.lastMaxActionValue)
                #self.module.printState(state)
                #print ' best:', actions[-1], 'value:', values[-1]

                # add a chance to pick a random other action
                if self.learning:
                    actions[-1] = self.learner.explore(color_state,
                                                       actions[-1])

        actionIdx = where(values == max(values))[0]
        ch = choice(actionIdx)
        self.lastaction = [actions[ch], ch]
        loc, color = divmod(ch, num_colors)
        self.bestState = self.lastobs[loc][color]

        #print 'assigning reward to state', self.bestState
        #print 'chosen action:', self.lastaction, 'value:', max(values)

        #print '============= end ============'
        return self.lastaction
Example #6
0
    def getAction(self):
        # get best action for every state observation
        # overlay all action values for every state observation, pick best
        LoggingAgent.getAction(self)

        # for each color, get best action, then pick highest-value action
        # among those actions
        actions = []
        values = []
        num_colors = len(self.lastobs[0])
        # TODO: why are same values printed many times in a row here? episodes
        #print '========== in agent =========='
        #print 'states:', self.lastobs
        for board_loc in self.lastobs:
            for color_state in board_loc:
                #print 'state:', color_state
                actions.append(self.module.activate(color_state))
                values.append(self.module.lastMaxActionValue)
                #self.module.printState(state)
                #print ' best:', actions[-1], 'value:', values[-1]

                # add a chance to pick a random other action
                if self.learning:
                    actions[-1] = self.learner.explore(color_state, actions[-1])

        actionIdx = where(values == max(values))[0]
        ch = choice(actionIdx)
        self.lastaction = [actions[ch], ch]
        loc, color = divmod(ch, num_colors)
        self.bestState = self.lastobs[loc][color]

        #print 'assigning reward to state', self.bestState
        #print 'chosen action:', self.lastaction, 'value:', max(values)

        #print '============= end ============'
        return self.lastaction
Example #7
0
 def getAction(self):
     """This is basically the Actor part"""
     LoggingAgent.getAction(self)
     self.lastaction = self.policy.activate(self.lastobs)
     return self.lastaction