def testStateAction(self): ps = self.init() s1 = getState('01') s2 = getState('10') ps[0].check(s1) ps[1].check(s2) assert ps[0].states[str(s1)].sgn == 'O' self.subtest(ps, '0101010') self.subtest(ps, '0101010') self.subtest(ps, '0101010') p = ps[0] s1 = (getState('0101')) p.update(s1, 0, reward(s1)) s2 = (getState('010101')) p.update(s2, 0, reward(s2)) resetAll(ps) assert p.states[str(s1)].actions[0].score != 0 # Test for best case poss = map(int, '01010101') s0 = C4State(initState, None, 'O') ss = [s0] i = 0 for j, pos in enumerate(poss): if (len(ss) > 5) and (ps[i].sgn == 'X'): act = ps[i].best(s0) assert act.name is not None assert pos != act.name, 'Best case not work' pos = int(act.name) s = getState(poss[:(j+1)]) ss.append(s) score = reward(s.win) ps[i].update(s0, pos, score) if s.win: ps[i].update(s, None, score) sX = ps[i].states[str(ss[-2])] assert(any([(a.score != 0) for a in sX.actions])) i = (i + 1) % 2 ps[i].update(s, None, score) sX = ps[i].states[str(ss[-3])] assert(any([(a.score != 0) for a in sX.actions])) break i = (i + 1) % 2 s0 = s
def subtest(self, ps, actions): resetAll(ps) poss = map(int, actions) s0 = C4State(initState, None, 'O') ss = [s0] i = 0 for j, pos in enumerate(poss): s = (getState(poss[:(j+1)])) ss.append(s) score = reward(s.win) ps[i].update(s0, pos, score) if s.win: ps[i].update(s, None, score) sX = ps[i].states[str(ss[-2])] assert any([(a.score != 0) for a in sX.actions]) i = (i + 1) % 2 ps[i].update(s, None, score) sX = ps[i].states[str(ss[-3])] assert any([(a.score != 0) for a in sX.actions]) break i = (i + 1) % 2 s0 = s resetAll(ps)
def testStateActionX(self): algos = ['SARSA', 'Q'] for algo in algos: print algo ps = self.init(['stupid', algo]) cnt = 0 rets = [] for k in xrange(1000): s0 = C4State(initState, None, 'O') i = 0 poss = [] for j in xrange(1000): p = ps[i] if p.sgn == 'X': act = p.best(s0) else: act = p.predict(s0) poss.append(int(act.name)) pos = poss[j] s = getState(poss[:(j+1)]) score = reward(s.win) p.update(s0, pos, score) if s.win: [p1.update(s, None, score) for p1 in ps] break s0 = s i = (i + 1) % 2 resetAll(ps) if s.win == 'X': cnt += 1 if (k+1) % 1000 == 0: print k+1, cnt rets.append((k+1, cnt)) assert cnt > 750 C4Model.clear()