Example #1
0
    def setUp(self):
        self.pomdp = Tiger()
        self.start = State(LEFT)  # tiger behind left door
        o = Observation()
        h = History()
        a = POMDPAction()
        self.root = create_node(h, a, o)
        self.root.inTree = True

        params['c'] = 2
        # setup a tree with root and its depth-1 children
        self.root.create_children()
        # expand listen-node
        a = Action(listen=True)
        s = self.start.clone()
        o, r = a.do_on(s)
        self.listen_child = self.root.children[a]
        self.listen_child.h.add(a, o)
        self.listen_child.inTree = True
        self.listen_child.create_children()
        # There is 3 depth-2 nodes, their histories go like:
        # empty-listen-listen (ell), empty-listen-left (elf), empty-listen-right (elr)
        # elr should have highest V (pref action)
        for act, child in self.listen_child.children.items():
            obs, r = child.a.do_on(s.clone())
            self.listen_child.children[act] = create_node(child.h, act, obs)
            self.listen_child.children[act].inTree = True
Example #2
0
 def test_pref_actions(self):
     self.h.add(self.a, self.o)
     a = Action(listen=True)
     s = self.start.clone()
     o, r = a.do_on(s)
     self.h.add(a, o)
     a2 = Action(direction=LEFT)
     o2, r = a2.do_on(s)
     node = create_node(self.h, a2, o2)
     self.assertEqual(12, node.V)
     self.assertEqual(5, node.N)
Example #3
0
 def test_simulate_expansion(self):
     root = create_node(History(), POMDPAction(), Observation())
     #self.root1 = root
     params.update({
         'start_time': time.time(),
         'gamma': 0.5,
         'epsilon': 0.2,
         'max_depth': 100,
         'timeout': 3
     })
     simulate(self.start, root)
     self.assertEqual(len(root.children), 3)
     self.assertEqual(root.N, 1)
Example #4
0
    def test_is_in_tree(self):
        # setup a tree with root and its depth 1 children
        root = create_node(self.h, self.a, self.o)
        root.inTree = True
        root.create_children()
        for act, child in root.children.items():
            obs, r = child.a.do_on(self.start.clone())
            child.h.add(child.a, obs)
            child.inTree = True

        h = root.h.clone()
        a = Action(listen=True)
        s = self.start.clone()
        o, r = a.do_on(s)
        h.add(a, o)
        self.assertTrue(root.is_intree(h))
        h2 = h.clone()
        h2.add(a, o)
        self.assertFalse(root.is_intree(h2))
def simulate(state, node, proc=None):
    """
    Iterative implementation of an MCTS simulation step, adapted to partial observability. This function builds 
    a whole PO-MCTS starting from the root node, alternating between the following phases.

    Expansion: if the termination criterion is not met, new nodes are created from  currently available actions.

    Selection: select the best node among the children evaluated with their UCB1 value.

    Simulation: simulate a playout starting from the selected node 

    Backpropagation: update statistics about the playouts (in rollout) up to the root.

    Args:
        state (POMDPState): state sampled either from the initial state distribution or from the belief space
        node (Node): current root of the tree containing the current history
        proc (DecisionProcess): domain specific knowledge about the pomdp
    
    """
    assert isinstance(node, Node)
    depth = 0
    rewards = []
    root = node
    fringe = [(node, depth)]  # descending down the tree
    backprop = []  # climbing up the tree
    s = state.clone()
    max_d = 0
    while fringe:
        nod, d = fringe.pop()

        if end_rollout(d, nod.h):
            rewards.append(0)
            backprop.append((nod, d, s.clone()))
            continue

        max_d = d if d >= max_d else max_d

        if not root.is_intree(nod.h):
            # Expansion
            nod.create_children()
            nod.inTree = True
            backprop.append((nod, d, s.clone()))
            rewards.append(rollout(s, nod, d))
            continue
        backprop.append((nod, d, s.clone()))

        # Selection
        a, u = UCB1_action_selection(nod)

        # Simulation
        o, r = a.do_on(s)
        hao = nod.h.clone()
        rewards.append(float(r))
        if nod.children[a].inTree:
            fringe.append((nod.children[a], d + 1))
        else:
            nod.children[a] = create_node(hao, a, o)
            fringe.append((nod.children[a], d + 1))

    # Backpropagation
    for i in range(1, len(backprop) + 1):
        nod, d, s = backprop[-i]  # parent
        nod_a = backprop[-i + 1][0]  # simulated child
        R = discount_calc(rewards[d::], params['gamma'])[0]
        # only add s to the belief space if its observation match the real observation
        sc = state.clone()
        o = nod.h.last_obs()
        for i in range(1, d):
            act = nod.h.actions[i]
            o, tmp = act.do_on(sc)
        if nod.h.last_obs() == o:
            nod.B.append(s)

        nod_a.N += 1
        nod_a.V += (R - nod_a.V) / nod_a.N
Example #6
0
 def test_create_children(self):
     node = create_node(self.h, self.a, self.o)
     node.create_children()
     self.assertEqual(3, len(node.children))
     for act, child in node.children.items():
         self.assertFalse(child.inTree)
Example #7
0
 def test_create_node(self):
     node = create_node(self.h, self.a, self.o)
     self.assertTrue(isinstance(node, Node))
     self.assertEqual(self.a, node.a)
     self.assertEqual(len(self.h), 1)
     self.assertFalse(node.inTree)