Python reward Examples

Programming Language: Python

Namespace/Package Name: rules

Method/Function: reward

Examples at hotexamples.com: 3

Python reward - 3 examples found. These are the top rated real world Python examples of rules.reward extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test.py Project: WeiChengLiou/connect4

    def testStateAction(self):
        ps = self.init()
        s1 = getState('01')
        s2 = getState('10')
        ps[0].check(s1)
        ps[1].check(s2)
        assert ps[0].states[str(s1)].sgn == 'O'

        self.subtest(ps, '0101010')
        self.subtest(ps, '0101010')
        self.subtest(ps, '0101010')

        p = ps[0]
        s1 = (getState('0101'))
        p.update(s1, 0, reward(s1))
        s2 = (getState('010101'))
        p.update(s2, 0, reward(s2))
        resetAll(ps)

        assert p.states[str(s1)].actions[0].score != 0

        # Test for best case
        poss = map(int, '01010101')
        s0 = C4State(initState, None, 'O')
        ss = [s0]
        i = 0
        for j, pos in enumerate(poss):
            if (len(ss) > 5) and (ps[i].sgn == 'X'):
                act = ps[i].best(s0)
                assert act.name is not None
                assert pos != act.name, 'Best case not work'
                pos = int(act.name)
            s = getState(poss[:(j+1)])
            ss.append(s)
            score = reward(s.win)
            ps[i].update(s0, pos, score)

            if s.win:
                ps[i].update(s, None, score)
                sX = ps[i].states[str(ss[-2])]
                assert(any([(a.score != 0) for a in sX.actions]))

                i = (i + 1) % 2
                ps[i].update(s, None, score)
                sX = ps[i].states[str(ss[-3])]
                assert(any([(a.score != 0) for a in sX.actions]))
                break

            i = (i + 1) % 2
            s0 = s

Example #2

Show file

File: test.py Project: WeiChengLiou/connect4

    def subtest(self, ps, actions):
        resetAll(ps)

        poss = map(int, actions)
        s0 = C4State(initState, None, 'O')
        ss = [s0]
        i = 0
        for j, pos in enumerate(poss):
            s = (getState(poss[:(j+1)]))
            ss.append(s)
            score = reward(s.win)
            ps[i].update(s0, pos, score)

            if s.win:
                ps[i].update(s, None, score)
                sX = ps[i].states[str(ss[-2])]
                assert any([(a.score != 0) for a in sX.actions])

                i = (i + 1) % 2
                ps[i].update(s, None, score)
                sX = ps[i].states[str(ss[-3])]
                assert any([(a.score != 0) for a in sX.actions])
                break

            i = (i + 1) % 2
            s0 = s

        resetAll(ps)

Example #3

Show file

File: test.py Project: WeiChengLiou/connect4

    def testStateActionX(self):
        algos = ['SARSA', 'Q']

        for algo in algos:
            print algo
            ps = self.init(['stupid', algo])

            cnt = 0
            rets = []
            for k in xrange(1000):
                s0 = C4State(initState, None, 'O')
                i = 0
                poss = []
                for j in xrange(1000):
                    p = ps[i]
                    if p.sgn == 'X':
                        act = p.best(s0)
                    else:
                        act = p.predict(s0)
                    poss.append(int(act.name))
                    pos = poss[j]
                    s = getState(poss[:(j+1)])
                    score = reward(s.win)
                    p.update(s0, pos, score)

                    if s.win:
                        [p1.update(s, None, score) for p1 in ps]
                        break

                    s0 = s
                    i = (i + 1) % 2

                resetAll(ps)
                if s.win == 'X':
                    cnt += 1
                if (k+1) % 1000 == 0:
                    print k+1, cnt
                    rets.append((k+1, cnt))
            assert cnt > 750

            C4Model.clear()