Beispiel #1
0
class TestTree(unittest.TestCase):
    def setUp(self):
        self.pomdp = Tiger()
        self.start = State(LEFT)  # tiger behind left door
        self.o = Observation()
        self.h = History()
        self.a = POMDPAction()

    def test_create_node(self):
        node = create_node(self.h, self.a, self.o)
        self.assertTrue(isinstance(node, Node))
        self.assertEqual(self.a, node.a)
        self.assertEqual(len(self.h), 1)
        self.assertFalse(node.inTree)

    def test_create_children(self):
        node = create_node(self.h, self.a, self.o)
        node.create_children()
        self.assertEqual(3, len(node.children))
        for act, child in node.children.items():
            self.assertFalse(child.inTree)

    def test_is_in_tree(self):
        # setup a tree with root and its depth 1 children
        root = create_node(self.h, self.a, self.o)
        root.inTree = True
        root.create_children()
        for act, child in root.children.items():
            obs, r = child.a.do_on(self.start.clone())
            child.h.add(child.a, obs)
            child.inTree = True

        h = root.h.clone()
        a = Action(listen=True)
        s = self.start.clone()
        o, r = a.do_on(s)
        h.add(a, o)
        self.assertTrue(root.is_intree(h))
        h2 = h.clone()
        h2.add(a, o)
        self.assertFalse(root.is_intree(h2))

    def test_pref_actions(self):
        self.h.add(self.a, self.o)
        a = Action(listen=True)
        s = self.start.clone()
        o, r = a.do_on(s)
        self.h.add(a, o)
        a2 = Action(direction=LEFT)
        o2, r = a2.do_on(s)
        node = create_node(self.h, a2, o2)
        self.assertEqual(12, node.V)
        self.assertEqual(5, node.N)
Beispiel #2
0
class MCPlayer(AbstractPlayer):
    def __init__(self, max_iter, timeout, log=0, pref=True):
        self.max_iter = max_iter
        params['timeout'] = timeout
        params['log'] = log
        if not pref:
            params["prefs"] = False
        self.h = History()
        self.last_action = POMDPAction()
        self.first = True

    def next_action(self, state):
        # init domain knowledge
        if self.first:
            self.dom_kno = Minesweeper(state.board.h, state.board.w,
                                       state.board.m)
            #self.first = False
        # update history with last action - observation
        o = Observation(state.board.clone().knowledge, state.board.m)
        self.h.add(self.last_action, o)
        #print(self.h)
        # launch UCT to select next best action based on current history
        a = search(self.h.clone(),
                   self.dom_kno,
                   self.max_iter,
                   clean=self.first)
        if self.first:
            self.first = False
        self.last_action = a
        assert isinstance(a, Action)
        return a.cell

    def reset(self):
        self.h = History()
        self.last_action = POMDPAction()
        self.first = True
class TestHistory(unittest.TestCase):
    def setUp(self):
        self.b = Board(4, 5, 3)
        self.s = State(self.b)
        self.h = History()

    def test_add(self):
        a = Action(0, 0)
        o, r = a.do_on(self.s)
        self.h.add(a, o)
        a2 = Action(2, 1)
        o2, r2 = a2.do_on(self.s)
        self.h.add(a2, o2)
        self.assertEqual(self.h.last_action(), a2)
        #print(o2)
        #print(self.h.last_obs())
        self.assertEqual(self.h.last_obs(), o2)

    def test_clone(self):
        a = Action(1, 0)
        o, r = a.do_on(self.s)
        self.h.add(a, o)
        h = self.h.clone()
        self.assertEqual(h, self.h)