def __call__(self,sensation,reward=None): if reward == None: self.stack = Stack([]) self.push_option(self.root_option) self.last_sensation = sensation self.last_reward = reward if rl.is_terminal(sensation): # unwind the stack giving everyone the current reward # TODO: when options get their own separate rewards, this may change while not self.stack.empty(): option,reward_list = self.stack.pop() option(sensation,reward_list+[option.reward(sensation,reward)]) return None else: for option,rewards in self.stack[:-1]: rewards.append(option.reward(sensation,reward)) option,rewards = self.stack.top() return self.haction(option(sensation,option.reward(sensation,reward)))
def __call__(self, sensation, reward=None): if reward == None: self.stack = Stack([]) self.push_option(self.root_option) self.last_sensation = sensation self.last_reward = reward if rl.is_terminal(sensation): # unwind the stack giving everyone the current reward # TODO: when options get their own separate rewards, this may change while not self.stack.empty(): option, reward_list = self.stack.pop() option(sensation, reward_list + [option.reward(sensation, reward)]) return None else: for option, rewards in self.stack[:-1]: rewards.append(option.reward(sensation, reward)) option, rewards = self.stack.top() return self.haction(option(sensation, option.reward(sensation, reward)))
def __init__(self, **args): super(HierarchicalAgent, self).__init__(**args) self.stack = Stack([])
class HierarchicalAgent(rl.Agent): """ An agent that can use Options for hierarchical behavior. Internally it keeps a stack of running options, and calls the top option in the stack, pushing or popping from the stack as needed. """ root_option = Parameter(default=None) def __init__(self, **args): super(HierarchicalAgent, self).__init__(**args) self.stack = Stack([]) def __call__(self, sensation, reward=None): if reward == None: self.stack = Stack([]) self.push_option(self.root_option) self.last_sensation = sensation self.last_reward = reward if rl.is_terminal(sensation): # unwind the stack giving everyone the current reward # TODO: when options get their own separate rewards, this may change while not self.stack.empty(): option, reward_list = self.stack.pop() option(sensation, reward_list + [option.reward(sensation, reward)]) return None else: for option, rewards in self.stack[:-1]: rewards.append(option.reward(sensation, reward)) option, rewards = self.stack.top() return self.haction(option(sensation, option.reward(sensation, reward))) def haction(self, action): self.debug("Doing haction: " + ` action `) sensation = self.last_sensation reward = self.last_reward if isinstance(action, Option): # The action is an option, so push it on the # stack and start it running. self.debug("pushing " + ` action `) self.push_option(action) result = self.haction(action(sensation)) elif action == OPTION_TERMINATED: # The top option on the stack terminated, so pop it # and do a step on the option below. self.debug("popping " + ` self.stack.top() `) self.stack.pop() if len(self.stack) == 0: raise "Error, hierarchical stack is empty." option, rewards = self.stack.pop() # reset the list of rewards self.push_option(option) # assert reward != None result = self.haction(option(sensation, rewards)) else: # It's not an option or OPTION_TERMINATED # so it's a primitive. Just return it. self.debug("Doing primitive: " + ` action `) result = action return result def push_option(self, option): # The stack is a list of pairs, an option, and a list of rewards # that occurred since the option's last step. self.stack.append((option, []))
def __init__(self,**args): super(HierarchicalAgent,self).__init__(**args) self.stack = Stack([])
class HierarchicalAgent(rl.Agent): """ An agent that can use Options for hierarchical behavior. Internally it keeps a stack of running options, and calls the top option in the stack, pushing or popping from the stack as needed. """ root_option = Parameter(default=None) def __init__(self,**args): super(HierarchicalAgent,self).__init__(**args) self.stack = Stack([]) def __call__(self,sensation,reward=None): if reward == None: self.stack = Stack([]) self.push_option(self.root_option) self.last_sensation = sensation self.last_reward = reward if rl.is_terminal(sensation): # unwind the stack giving everyone the current reward # TODO: when options get their own separate rewards, this may change while not self.stack.empty(): option,reward_list = self.stack.pop() option(sensation,reward_list+[option.reward(sensation,reward)]) return None else: for option,rewards in self.stack[:-1]: rewards.append(option.reward(sensation,reward)) option,rewards = self.stack.top() return self.haction(option(sensation,option.reward(sensation,reward))) def haction(self,action): self.debug("Doing haction: "+`action`) sensation = self.last_sensation reward = self.last_reward if isinstance(action,Option): # The action is an option, so push it on the # stack and start it running. self.debug("pushing "+`action`) self.push_option(action) result = self.haction(action(sensation)) elif action == OPTION_TERMINATED: # The top option on the stack terminated, so pop it # and do a step on the option below. self.debug("popping "+`self.stack.top()`) self.stack.pop() if len(self.stack) == 0: raise "Error, hierarchical stack is empty." option,rewards = self.stack.pop() # reset the list of rewards self.push_option(option) # assert reward != None result = self.haction(option(sensation,rewards)) else: # It's not an option or OPTION_TERMINATED # so it's a primitive. Just return it. self.debug("Doing primitive: "+`action`) result = action return result def push_option(self,option): # The stack is a list of pairs, an option, and a list of rewards # that occurred since the option's last step. self.stack.append( (option, []) )