Beispiel #1
0
    def rSA(self, state, act, sgn):
        sgn1 = 'X' if self.sgn == 'O' else 'O'
        state1 = newstate(state, act, sgn)
        r0 = self.reward(state, state1)
        v = r0

        ret = init.evalS(state1)
        if ret is None:
            r1 = 0
            for act1, p1 in self.pi[state1].iteritems():
                state2 = newstate(state1, act1, sgn1)
                p = self.pi[state1][act1]
                V1 = self.value(state2)
                r1 += p * V1[0]
            v += self.gamma * r1
        return v
Beispiel #2
0
    def genstate(self):
        chg = 1
        state0 = [None] * 9
        stateV = STATE(state0)
        self.pi[tuple(state0)] = stateV

        while chg > 0:
            n0 = len(self.pi)
            keys = self.pi.keys()
            for s in keys:
                v = self.pi[s]
                sgn = getturn(s)
                n = len(v.Actions)

                for act, v1 in v.Actions.iteritems():
                    state1 = init.newstate(s, act, sgn)
                    v1.p = 1. / n
                    v1.ret = init.evalS(state1)
                    v1.r = Reward(s, state1)
                    v1.v = v1.r
                    if v1.ret is None:
                        key = tuple(state1)
                        if key not in self.pi:
                            stateV = STATE(key)
                            self.pi[key] = stateV
                        v1.nextstate = self.pi[key]
            n1 = len(self.pi)
            chg = n1 - n0
            print n0, n1