Python Particles 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: POMDP_framework

클래스/타입: Particles

hotexamples.com에서의 예제들: 4

Python Particles - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 POMDP_framework.Particles에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Particles(3)

__setitem__(1)

random(1)

자주 사용되는 메소드들

Particles (3)

__setitem__ (1)

random (1)

예제 #1

파일 보기

 def _VNode(self, agent=None, root=False, **kwargs):
     """Returns a VNode with default values; The function naming makes it clear
     that this function is about creating a VNode object."""
     if root:
         # agent cannot be None.
         return RootVNodeParticles(self._num_visits_init,
                                   self._value_init,
                                   agent.history,
                                   belief=copy.deepcopy(agent.belief))
     else:
         if agent is None:
             return VNodeParticles(self._num_visits_init,
                                   self._value_init,
                                   belief=Particles([]))
         else:
             return VNodeParticles(self._num_visits_init,
                                   self._value_init,
                                   belief=copy.deepcopy(agent.belief))

예제 #2

파일 보기

 def __init__(self, num_visits, value, history, belief=Particles([])):
     # vnodeobj = VNodeParticles(num_visits, value, belief=belief)
     RootVNode.__init__(self, num_visits, value, history)
     self.belief = belief

예제 #3

파일 보기

 def __init__(self, num_visits, value, belief=Particles([])):
     self.num_visits = num_visits
     self.value = value
     self.belief = belief
     self.children = {}  # a -> QNode

예제 #4

파일 보기

    def _simulate(self,
                  state,
                  history,
                  root,
                  parent,
                  observation,
                  depth,
                  k_o=5,
                  alpha_o=1 / 15):  # root<-class:VNode, parent<-class:QNode
        if depth > self._max_depth:
            return 0

        if root is None:
            if self._agent.tree is None:
                root = self._VNode(agent=self._agent, root=True)
                self._agent.tree = root
                if self._agent.tree.history != self._agent.history:
                    raise ValueError("Unable to plan for the given history.")
            else:
                root = self._VNode()

            if parent is not None:
                parent[observation] = root

        action = self._ActionProgWiden(vnode=root, history=history)
        next_state, observation, reward, nsteps = sample_generative_model(
            self._agent, state, action)

        _history_action = root[action]
        if len(_history_action.children
               ) <= k_o * _history_action.num_visits**alpha_o:
            if root[action][observation] is None:
                history_action_observation_node = self._VNode(
                    agent=self._agent, root=False)
                root[action][observation] = history_action_observation_node
        else:
            observation = random.choice(root[action].children)

        # append s` to B(hao)
        root[action][observation].belief.add(next_state)
        # append Z(o|s,a,s`) to W(hao)
        prob = self._pomdp.agent._observation_model.probability(
            observation, next_state, action)
        Particles.__setitem__(next_state, prob)

        if observation not in root[action].children:
            root[action].children += observation
            total_reward = reward + self._rollout(state, history, root, depth)
        else:
            # s` <- select B(hao)[i] w.p W(hao)[i]/sigma(j=1~m) W(hao)[j]
            next_state = Particles.random()
            # r <- R(s,a,s`)
            reward = self._agent.reward_model.sample(state, action, next_state)
            total_reward = reward + (
                self._discount_factor**nsteps) * self._simulate(
                    next_state, history +
                    ((action, observation), ), root[action][observation],
                    root[action], observation, depth + nsteps)

        root.num_visits += 1
        root[action].num_visits += 1
        root[action].value = root[action].value + (
            total_reward - root[action].value) / (root[action].num_visits)
        return total_reward