def __call__(self,sensation,reward=None):
        if reward == None:
            self.stack = Stack([])
            self.push_option(self.root_option)
    
        self.last_sensation = sensation
        self.last_reward = reward

        if rl.is_terminal(sensation):
            # unwind the stack giving everyone the current reward
            # TODO: when options get their own separate rewards, this may change
            while not self.stack.empty():
                option,reward_list = self.stack.pop()
                option(sensation,reward_list+[option.reward(sensation,reward)])
            return None
        else:
            for option,rewards in self.stack[:-1]:
                rewards.append(option.reward(sensation,reward))
            option,rewards = self.stack.top()
            return  self.haction(option(sensation,option.reward(sensation,reward)))
    def __call__(self, sensation, reward=None):
        if reward == None:
            self.stack = Stack([])
            self.push_option(self.root_option)

        self.last_sensation = sensation
        self.last_reward = reward

        if rl.is_terminal(sensation):
            # unwind the stack giving everyone the current reward
            # TODO: when options get their own separate rewards, this may change
            while not self.stack.empty():
                option, reward_list = self.stack.pop()
                option(sensation, reward_list + [option.reward(sensation, reward)])
            return None
        else:
            for option, rewards in self.stack[:-1]:
                rewards.append(option.reward(sensation, reward))
            option, rewards = self.stack.top()
            return self.haction(option(sensation, option.reward(sensation, reward)))
 def __init__(self, **args):
     super(HierarchicalAgent, self).__init__(**args)
     self.stack = Stack([])
class HierarchicalAgent(rl.Agent):
    """
    An agent that can use Options for hierarchical behavior.
    Internally it keeps a stack of running options, and calls
    the top option in the stack, pushing or popping
    from the stack as needed.
    """

    root_option = Parameter(default=None)

    def __init__(self, **args):
        super(HierarchicalAgent, self).__init__(**args)
        self.stack = Stack([])

    def __call__(self, sensation, reward=None):
        if reward == None:
            self.stack = Stack([])
            self.push_option(self.root_option)

        self.last_sensation = sensation
        self.last_reward = reward

        if rl.is_terminal(sensation):
            # unwind the stack giving everyone the current reward
            # TODO: when options get their own separate rewards, this may change
            while not self.stack.empty():
                option, reward_list = self.stack.pop()
                option(sensation, reward_list + [option.reward(sensation, reward)])
            return None
        else:
            for option, rewards in self.stack[:-1]:
                rewards.append(option.reward(sensation, reward))
            option, rewards = self.stack.top()
            return self.haction(option(sensation, option.reward(sensation, reward)))

    def haction(self, action):
        self.debug("Doing haction: " + ` action `)
        sensation = self.last_sensation
        reward = self.last_reward
        if isinstance(action, Option):
            # The action is an option, so push it on the
            # stack and start it running.
            self.debug("pushing " + ` action `)
            self.push_option(action)
            result = self.haction(action(sensation))
        elif action == OPTION_TERMINATED:
            # The top option on the stack terminated, so pop it
            # and do a step on the option below.
            self.debug("popping " + ` self.stack.top() `)
            self.stack.pop()
            if len(self.stack) == 0:
                raise "Error, hierarchical stack is empty."
            option, rewards = self.stack.pop()
            # reset the list of rewards
            self.push_option(option)
            # assert reward != None
            result = self.haction(option(sensation, rewards))
        else:
            # It's not an option or OPTION_TERMINATED
            # so it's a primitive. Just return it.
            self.debug("Doing primitive: " + ` action `)
            result = action

        return result

    def push_option(self, option):
        # The stack is a list of pairs, an option, and a list of rewards
        # that occurred since the option's last step.
        self.stack.append((option, []))
 def __init__(self,**args):
     super(HierarchicalAgent,self).__init__(**args)
     self.stack = Stack([])
class HierarchicalAgent(rl.Agent):
    """
    An agent that can use Options for hierarchical behavior.
    Internally it keeps a stack of running options, and calls
    the top option in the stack, pushing or popping
    from the stack as needed.
    """

    root_option = Parameter(default=None)
    
    def __init__(self,**args):
        super(HierarchicalAgent,self).__init__(**args)
        self.stack = Stack([])
    def __call__(self,sensation,reward=None):
        if reward == None:
            self.stack = Stack([])
            self.push_option(self.root_option)
    
        self.last_sensation = sensation
        self.last_reward = reward

        if rl.is_terminal(sensation):
            # unwind the stack giving everyone the current reward
            # TODO: when options get their own separate rewards, this may change
            while not self.stack.empty():
                option,reward_list = self.stack.pop()
                option(sensation,reward_list+[option.reward(sensation,reward)])
            return None
        else:
            for option,rewards in self.stack[:-1]:
                rewards.append(option.reward(sensation,reward))
            option,rewards = self.stack.top()
            return  self.haction(option(sensation,option.reward(sensation,reward)))

    def haction(self,action):
        self.debug("Doing haction: "+`action`)
        sensation = self.last_sensation
        reward = self.last_reward
        if  isinstance(action,Option):
            # The action is an option, so push it on the
            # stack and start it running.
            self.debug("pushing "+`action`)
            self.push_option(action)
            result = self.haction(action(sensation))
        elif action == OPTION_TERMINATED:
            # The top option on the stack terminated, so pop it
            # and do a step on the option below.
            self.debug("popping "+`self.stack.top()`)
            self.stack.pop()
            if len(self.stack) == 0:
                raise "Error, hierarchical stack is empty."
            option,rewards = self.stack.pop()
            # reset the list of rewards
            self.push_option(option)
            # assert reward != None
            result = self.haction(option(sensation,rewards))
        else:
            # It's not an option or OPTION_TERMINATED
            # so it's a primitive. Just return it.
            self.debug("Doing primitive: "+`action`)
            result =  action  

        return result

    def push_option(self,option):
        # The stack is a list of pairs, an option, and a list of rewards
        # that occurred since the option's last step.
        self.stack.append( (option, []) )