class TestTree(unittest.TestCase): def setUp(self): self.pomdp = Tiger() self.start = State(LEFT) # tiger behind left door self.o = Observation() self.h = History() self.a = POMDPAction() def test_create_node(self): node = create_node(self.h, self.a, self.o) self.assertTrue(isinstance(node, Node)) self.assertEqual(self.a, node.a) self.assertEqual(len(self.h), 1) self.assertFalse(node.inTree) def test_create_children(self): node = create_node(self.h, self.a, self.o) node.create_children() self.assertEqual(3, len(node.children)) for act, child in node.children.items(): self.assertFalse(child.inTree) def test_is_in_tree(self): # setup a tree with root and its depth 1 children root = create_node(self.h, self.a, self.o) root.inTree = True root.create_children() for act, child in root.children.items(): obs, r = child.a.do_on(self.start.clone()) child.h.add(child.a, obs) child.inTree = True h = root.h.clone() a = Action(listen=True) s = self.start.clone() o, r = a.do_on(s) h.add(a, o) self.assertTrue(root.is_intree(h)) h2 = h.clone() h2.add(a, o) self.assertFalse(root.is_intree(h2)) def test_pref_actions(self): self.h.add(self.a, self.o) a = Action(listen=True) s = self.start.clone() o, r = a.do_on(s) self.h.add(a, o) a2 = Action(direction=LEFT) o2, r = a2.do_on(s) node = create_node(self.h, a2, o2) self.assertEqual(12, node.V) self.assertEqual(5, node.N)
class MCPlayer(AbstractPlayer): def __init__(self, max_iter, timeout, log=0, pref=True): self.max_iter = max_iter params['timeout'] = timeout params['log'] = log if not pref: params["prefs"] = False self.h = History() self.last_action = POMDPAction() self.first = True def next_action(self, state): # init domain knowledge if self.first: self.dom_kno = Minesweeper(state.board.h, state.board.w, state.board.m) #self.first = False # update history with last action - observation o = Observation(state.board.clone().knowledge, state.board.m) self.h.add(self.last_action, o) #print(self.h) # launch UCT to select next best action based on current history a = search(self.h.clone(), self.dom_kno, self.max_iter, clean=self.first) if self.first: self.first = False self.last_action = a assert isinstance(a, Action) return a.cell def reset(self): self.h = History() self.last_action = POMDPAction() self.first = True
class TestHistory(unittest.TestCase): def setUp(self): self.b = Board(4, 5, 3) self.s = State(self.b) self.h = History() def test_add(self): a = Action(0, 0) o, r = a.do_on(self.s) self.h.add(a, o) a2 = Action(2, 1) o2, r2 = a2.do_on(self.s) self.h.add(a2, o2) self.assertEqual(self.h.last_action(), a2) #print(o2) #print(self.h.last_obs()) self.assertEqual(self.h.last_obs(), o2) def test_clone(self): a = Action(1, 0) o, r = a.do_on(self.s) self.h.add(a, o) h = self.h.clone() self.assertEqual(h, self.h)