def setUp(self): self.pomdp = Tiger() self.start = State(LEFT) # tiger behind left door o = Observation() h = History() a = POMDPAction() self.root = create_node(h, a, o) self.root.inTree = True params['c'] = 2 # setup a tree with root and its depth-1 children self.root.create_children() # expand listen-node a = Action(listen=True) s = self.start.clone() o, r = a.do_on(s) self.listen_child = self.root.children[a] self.listen_child.h.add(a, o) self.listen_child.inTree = True self.listen_child.create_children() # There is 3 depth-2 nodes, their histories go like: # empty-listen-listen (ell), empty-listen-left (elf), empty-listen-right (elr) # elr should have highest V (pref action) for act, child in self.listen_child.children.items(): obs, r = child.a.do_on(s.clone()) self.listen_child.children[act] = create_node(child.h, act, obs) self.listen_child.children[act].inTree = True
def test_pref_actions(self): self.h.add(self.a, self.o) a = Action(listen=True) s = self.start.clone() o, r = a.do_on(s) self.h.add(a, o) a2 = Action(direction=LEFT) o2, r = a2.do_on(s) node = create_node(self.h, a2, o2) self.assertEqual(12, node.V) self.assertEqual(5, node.N)
def test_simulate_expansion(self): root = create_node(History(), POMDPAction(), Observation()) #self.root1 = root params.update({ 'start_time': time.time(), 'gamma': 0.5, 'epsilon': 0.2, 'max_depth': 100, 'timeout': 3 }) simulate(self.start, root) self.assertEqual(len(root.children), 3) self.assertEqual(root.N, 1)
def test_is_in_tree(self): # setup a tree with root and its depth 1 children root = create_node(self.h, self.a, self.o) root.inTree = True root.create_children() for act, child in root.children.items(): obs, r = child.a.do_on(self.start.clone()) child.h.add(child.a, obs) child.inTree = True h = root.h.clone() a = Action(listen=True) s = self.start.clone() o, r = a.do_on(s) h.add(a, o) self.assertTrue(root.is_intree(h)) h2 = h.clone() h2.add(a, o) self.assertFalse(root.is_intree(h2))
def simulate(state, node, proc=None): """ Iterative implementation of an MCTS simulation step, adapted to partial observability. This function builds a whole PO-MCTS starting from the root node, alternating between the following phases. Expansion: if the termination criterion is not met, new nodes are created from currently available actions. Selection: select the best node among the children evaluated with their UCB1 value. Simulation: simulate a playout starting from the selected node Backpropagation: update statistics about the playouts (in rollout) up to the root. Args: state (POMDPState): state sampled either from the initial state distribution or from the belief space node (Node): current root of the tree containing the current history proc (DecisionProcess): domain specific knowledge about the pomdp """ assert isinstance(node, Node) depth = 0 rewards = [] root = node fringe = [(node, depth)] # descending down the tree backprop = [] # climbing up the tree s = state.clone() max_d = 0 while fringe: nod, d = fringe.pop() if end_rollout(d, nod.h): rewards.append(0) backprop.append((nod, d, s.clone())) continue max_d = d if d >= max_d else max_d if not root.is_intree(nod.h): # Expansion nod.create_children() nod.inTree = True backprop.append((nod, d, s.clone())) rewards.append(rollout(s, nod, d)) continue backprop.append((nod, d, s.clone())) # Selection a, u = UCB1_action_selection(nod) # Simulation o, r = a.do_on(s) hao = nod.h.clone() rewards.append(float(r)) if nod.children[a].inTree: fringe.append((nod.children[a], d + 1)) else: nod.children[a] = create_node(hao, a, o) fringe.append((nod.children[a], d + 1)) # Backpropagation for i in range(1, len(backprop) + 1): nod, d, s = backprop[-i] # parent nod_a = backprop[-i + 1][0] # simulated child R = discount_calc(rewards[d::], params['gamma'])[0] # only add s to the belief space if its observation match the real observation sc = state.clone() o = nod.h.last_obs() for i in range(1, d): act = nod.h.actions[i] o, tmp = act.do_on(sc) if nod.h.last_obs() == o: nod.B.append(s) nod_a.N += 1 nod_a.V += (R - nod_a.V) / nod_a.N
def test_create_children(self): node = create_node(self.h, self.a, self.o) node.create_children() self.assertEqual(3, len(node.children)) for act, child in node.children.items(): self.assertFalse(child.inTree)
def test_create_node(self): node = create_node(self.h, self.a, self.o) self.assertTrue(isinstance(node, Node)) self.assertEqual(self.a, node.a) self.assertEqual(len(self.h), 1) self.assertFalse(node.inTree)