def getAction(self): # get best action for every state observation # overlay all action values for every state observation, pick best LoggingAgent.getAction(self) # for each color, get best action, then pick highest-value action # among those actions actions = [] values = [] # TODO: why are same values printed many times in a row here? #print '========== in agent ==========' #print 'states:', [[i] for i in self.lastobs.flatten()] for state in self.lastobs: #print 'state:', state actions.append(self.module.activate(state)) values.append(self.module.lastMaxActionValue) #self.module.printState(state) #print ' best:', actions[-1], 'value:', values[-1] actionIdx = where(values == max(values))[0] ch = choice(actionIdx) self.lastaction = actions[ch] self.bestState = self.lastobs[ch] #print 'assigning reward to state', self.bestState #print 'chosen action:', self.lastaction, 'value:', max(values) # add a chance to pick a random other action if self.learning: self.lastaction = self.learner.explore(self.lastobs, self.lastaction) #print 'after explorer:', self.lastaction #print '============= end ============' return self.lastaction
def getAction(self): """ Activate the module with the last observation, add the exploration from the explorer object and store the result as last action. """ LoggingAgent.getAction(self) self.lastaction = self.module.activate(self.lastobs) if self.learning: self.lastaction = self.learner.explore(self.lastobs, self.lastaction) return self.lastaction
def getAction(self): """ Activate the module with the last observation, add the exploration from the explorer object and store the result as last action. """ LoggingAgent.getAction(self) # Here is where the table or neural network returns the action # This consists of the values of the different actions # We choose the action with highet value from numpy import argmax, size tempAction = self.module.activate(self.lastobs) if (tempAction.size > 1): bestAction = argmax(tempAction) self.lastaction = [bestAction] else: # Original Code (used still for stuff like table lookup) self.lastaction = self.module.activate(self.lastobs) if self.learning: self.lastaction = self.learner.explore(self.lastobs, self.lastaction) return self.lastaction
def getAction(self): # get best action for every state observation # overlay all action values for every state observation, pick best LoggingAgent.getAction(self) # for each color, get best action, then pick highest-value action # among those actions actions = [] values = [] num_colors = len(self.lastobs[0]) # TODO: why are same values printed many times in a row here? episodes #print '========== in agent ==========' #print 'states:', self.lastobs for board_loc in self.lastobs: for color_state in board_loc: #print 'state:', color_state actions.append(self.module.activate(color_state)) values.append(self.module.lastMaxActionValue) #self.module.printState(state) #print ' best:', actions[-1], 'value:', values[-1] # add a chance to pick a random other action if self.learning: actions[-1] = self.learner.explore(color_state, actions[-1]) actionIdx = where(values == max(values))[0] ch = choice(actionIdx) self.lastaction = [actions[ch], ch] loc, color = divmod(ch, num_colors) self.bestState = self.lastobs[loc][color] #print 'assigning reward to state', self.bestState #print 'chosen action:', self.lastaction, 'value:', max(values) #print '============= end ============' return self.lastaction
def getAction(self): """This is basically the Actor part""" LoggingAgent.getAction(self) self.lastaction = self.policy.activate(self.lastobs) return self.lastaction