Ejemplo n.º 1
0
    def run(self):
        """
        Run the evaluation.
        """
        t = 0

        while t < self.T:
            self.e_trace.clear()
            s = generate_initial_state()
            a = self._choose_action(s)

            while True:
                for i in range(len(_feature_space)):
                    self.e_trace[i] *= self.lambda_

                f_a = _phi(s, a)

                for i in f_a:
                    self.e_trace[i] += 1

                s1, r = step(s, a)
                delta = r - self.q(s, a)

                if is_episode_terminated(r, a):
                    self._update_theta(delta)
                    break

                s = s1
                a = self._choose_action(s)
                delta += self.q(s, a)
                self._update_theta(delta)
            self.learning_curve.append((t, self.extract_q()))
            t += 1
        return self
Ejemplo n.º 2
0
def generate_episode(policy):
    """
    Returns a generator which can be iterated to reveal
    successive game steps. Calling yield on this generator
    will return a 5-tuple: (s, a, r, s1, a1) where:
        s - the initial state
        a - the action taken in s
        r - the reward received
        s1 - the new state after a is performed
        a1 - a new action drawn from the policy in state s1

    :param policy: the policy pi(s, a)
    :return: a game episode generator
    """
    s = generate_initial_state()
    a = draw_action(s, policy)

    while True:
        s1, r = step(s, a)
        a1 = draw_action(s1, policy)
        yield (s, a, r, s1, a1)

        # Episode ends after we stick or lose, whichever comes first
        if is_episode_terminated(r, a):
            break
        s = s1
        a = a1
Ejemplo n.º 3
0
def generate_episode(policy):
    """
    Returns a generator which can be iterated to reveal
    successive game steps. Calling yield on this generator
    will return a 5-tuple: (s, a, r, s1, a1) where:
        s - the initial state
        a - the action taken in s
        r - the reward received
        s1 - the new state after a is performed
        a1 - a new action drawn from the policy in state s1

    :param policy: the policy pi(s, a)
    :return: a game episode generator
    """
    s = generate_initial_state()
    a = draw_action(s, policy)

    while True:
        s1, r = step(s, a)
        a1 = draw_action(s1, policy)
        yield (s, a, r, s1, a1)

        # Episode ends after we stick or lose, whichever comes first
        if is_episode_terminated(r, a):
            break
        s = s1
        a = a1