Ejemplo n.º 1
0
class MCPlayer(AbstractPlayer):
    def __init__(self, max_iter, timeout, log=0, pref=True):
        self.max_iter = max_iter
        params['timeout'] = timeout
        params['log'] = log
        if not pref:
            params["prefs"] = False
        self.h = History()
        self.last_action = POMDPAction()
        self.first = True

    def next_action(self, state):
        # init domain knowledge
        if self.first:
            self.dom_kno = Minesweeper(state.board.h, state.board.w,
                                       state.board.m)
            #self.first = False
        # update history with last action - observation
        o = Observation(state.board.clone().knowledge, state.board.m)
        self.h.add(self.last_action, o)
        #print(self.h)
        # launch UCT to select next best action based on current history
        a = search(self.h.clone(),
                   self.dom_kno,
                   self.max_iter,
                   clean=self.first)
        if self.first:
            self.first = False
        self.last_action = a
        assert isinstance(a, Action)
        return a.cell

    def reset(self):
        self.h = History()
        self.last_action = POMDPAction()
        self.first = True
Ejemplo n.º 2
0
class TestHistory(unittest.TestCase):
    def setUp(self):
        self.b = Board(4, 5, 3)
        self.s = State(self.b)
        self.h = History()

    def test_add(self):
        a = Action(0, 0)
        o, r = a.do_on(self.s)
        self.h.add(a, o)
        a2 = Action(2, 1)
        o2, r2 = a2.do_on(self.s)
        self.h.add(a2, o2)
        self.assertEqual(self.h.last_action(), a2)
        #print(o2)
        #print(self.h.last_obs())
        self.assertEqual(self.h.last_obs(), o2)

    def test_clone(self):
        a = Action(1, 0)
        o, r = a.do_on(self.s)
        self.h.add(a, o)
        h = self.h.clone()
        self.assertEqual(h, self.h)