def __call__(self,sensation,reward=None): if reward == None: self.stack = Stack([]) self.push_option(self.root_option) self.last_sensation = sensation self.last_reward = reward if rl.is_terminal(sensation): # unwind the stack giving everyone the current reward # TODO: when options get their own separate rewards, this may change while not self.stack.empty(): option,reward_list = self.stack.pop() option(sensation,reward_list+[option.reward(sensation,reward)]) return None else: for option,rewards in self.stack[:-1]: rewards.append(option.reward(sensation,reward)) option,rewards = self.stack.top() return self.haction(option(sensation,option.reward(sensation,reward)))
def __init__(self,**args): super(HierarchicalAgent,self).__init__(**args) self.stack = Stack([])