예제 #1
0
 def _VNode(self, agent=None, root=False, **kwargs):
     """Returns a VNode with default values; The function naming makes it clear
     that this function is about creating a VNode object."""
     if root:
         # agent cannot be None.
         return RootVNodeParticles(self._num_visits_init,
                                   self._value_init,
                                   agent.history,
                                   belief=copy.deepcopy(agent.belief))
     else:
         if agent is None:
             return VNodeParticles(self._num_visits_init,
                                   self._value_init,
                                   belief=Particles([]))
         else:
             return VNodeParticles(self._num_visits_init,
                                   self._value_init,
                                   belief=copy.deepcopy(agent.belief))
예제 #2
0
 def __init__(self, num_visits, value, history, belief=Particles([])):
     # vnodeobj = VNodeParticles(num_visits, value, belief=belief)
     RootVNode.__init__(self, num_visits, value, history)
     self.belief = belief
예제 #3
0
 def __init__(self, num_visits, value, belief=Particles([])):
     self.num_visits = num_visits
     self.value = value
     self.belief = belief
     self.children = {}  # a -> QNode
예제 #4
0
    def _simulate(self,
                  state,
                  history,
                  root,
                  parent,
                  observation,
                  depth,
                  k_o=5,
                  alpha_o=1 / 15):  # root<-class:VNode, parent<-class:QNode
        if depth > self._max_depth:
            return 0

        if root is None:
            if self._agent.tree is None:
                root = self._VNode(agent=self._agent, root=True)
                self._agent.tree = root
                if self._agent.tree.history != self._agent.history:
                    raise ValueError("Unable to plan for the given history.")
            else:
                root = self._VNode()

            if parent is not None:
                parent[observation] = root

        action = self._ActionProgWiden(vnode=root, history=history)
        next_state, observation, reward, nsteps = sample_generative_model(
            self._agent, state, action)

        _history_action = root[action]
        if len(_history_action.children
               ) <= k_o * _history_action.num_visits**alpha_o:
            if root[action][observation] is None:
                history_action_observation_node = self._VNode(
                    agent=self._agent, root=False)
                root[action][observation] = history_action_observation_node
        else:
            observation = random.choice(root[action].children)

        # append s` to B(hao)
        root[action][observation].belief.add(next_state)
        # append Z(o|s,a,s`) to W(hao)
        prob = self._pomdp.agent._observation_model.probability(
            observation, next_state, action)
        Particles.__setitem__(next_state, prob)

        if observation not in root[action].children:
            root[action].children += observation
            total_reward = reward + self._rollout(state, history, root, depth)
        else:
            # s` <- select B(hao)[i] w.p W(hao)[i]/sigma(j=1~m) W(hao)[j]
            next_state = Particles.random()
            # r <- R(s,a,s`)
            reward = self._agent.reward_model.sample(state, action, next_state)
            total_reward = reward + (
                self._discount_factor**nsteps) * self._simulate(
                    next_state, history +
                    ((action, observation), ), root[action][observation],
                    root[action], observation, depth + nsteps)

        root.num_visits += 1
        root[action].num_visits += 1
        root[action].value = root[action].value + (
            total_reward - root[action].value) / (root[action].num_visits)
        return total_reward