Example #1
0
    def backup(self, cur_visit, visits, reward):

        # get previous state
        prev_visit = visits[len(visits) - 1]
        state_action = list(map(int, prev_visit.split(':')))
        state = State()
        state.dealer = state_action[0]
        state.player = state_action[1]
        action = state_action[2]

        # update eligibility trace (accumulating)

        Fv = self.get_coarse_feature(state, action)
        self.E[Fv == 1] += 1.0
        delta = reward - np.sum(Fv * self.theta)

        state_action = list(map(int, cur_visit.split(':')))
        state.dealer = state_action[0]
        state.player = state_action[1]
        action = state_action[2]

        Fv = self.get_coarse_feature(state, action)
        delta += np.sum(Fv * self.theta)
        self.E *= CONST.LAMBDAS[self.l]

        # update theta
        self.theta += CONST.ALPHA * delta * self.E
Example #2
0
    def record_mse(self, episode):
        _mse = 0.0
        for dealer in range(1, 10):
            for player in range(1, CONST.EASY21):
                for action in [CONST.HIT, CONST.STICK]:
                    visit = "%d:%d:%d" % (dealer, player, action)
                    if _mse < 0.00000001 and not _mse == 0.0:
                        _mse = 0.0
                    if not _mse > 10000.0:
                        state = State()
                        state.dealer = dealer
                        state.player = player
                        F = self.get_coarse_feature(state, action)
                        Q = np.sum(F * self.theta)
                        _mse += (Q - self.MCQ[visit])**2

        self.mse[episode] = _mse / 10.0 / 21.0 / 2.0
Example #3
0
def play():
    state = State()

    env = Environment()
    state.dealer = env.draw(initial=True)
    state.player = env.draw(initial=True)

    print(state)

    while state.player < CONST.STAY_VALUE \
            and not env.check_burst(state.player):

        env.step(state, CONST.HIT)
        print(state)

    if not env.check_burst(state.player):
        env.step(state, CONST.STICK)

    print(state, " Reward: ", env.check_reward(state))
Example #4
0
def checkStepDealer():

    record_data = {
        key: np.zeros(1, CONST.STEPDTYPE)
        for key in CONST.STEPRECORDPLAYS
    }

    s = State()
    e = Environment()

    for rd in record_data:
        index = rd.split('-')
        a = CONST.HIT if int(index[2]) is 0 else CONST.STICK

        frequency = dict()
        for i in range(0, CONST.SAMPLES - 1, 1):
            s.dealer = int(index[0])
            s.player = int(index[1])
            r = e.step(s, a)

            key = "%d:%d:%d" % (s.dealer, s.player, r)
            if e.is_terminal(s):
                key = "%d:%d:%d" % (0, 0, r)

            if key in frequency:
                frequency[key] += 1
            else:
                frequency[key] = 1

        for entry in frequency:
            frequency[entry] = "%.3f" % (float(frequency[entry]) /
                                         CONST.SAMPLES)

        filename = CONST.CHECKSTEPTEMPLATE.format(str(index[0]), str(index[1]),
                                                  str(index[2]))

        write(CONST.OUTPUT_PATH + filename, frequency)