Beispiel #1
0
def run():
    global discount
    global epsilon
    global alpha
    global log
    score = 0
    time.sleep(1)
    s1 = World.player
    a1, q_val1 = policy(s1)
    for episode_num in range(40):
        steps = 0
        score = 0
        while not World.has_restarted():
            # Do the action
            (s1, a1, r1, s2) = do_action(a1)
            score += r1

            # Update Q
            a2, q_val2 = policy(
                s2)  # Change to max_Q(s2) if following Greedy policy
            a_best, q_best = max_Q(s2)
            delta = r1 + discount * q_best - Q[s1][a1]
            E[s1][a1] = 1

            for state in states:
                for action in actions:
                    inc_Q(state, action, alpha, delta)
                    if a_best == a2:
                        E[state][action] *= discount * e_decay
                    else:
                        E[state][action] = 0
            # print('new q:', Q[s1][a1])
            s1 = s2
            a1 = a2
            q_val1 = q_val2

            steps += 1

            # Update the learning rate

            # MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
            # time.sleep(0.005)

        World.restart_game()
        reset_E()
        log.append({
            'episode': episode_num,
            'score': score,
            'steps': steps,
            'alpha': alpha,
            'epsilon': 0
        })
        # time.sleep(0.01)
        alpha = max(0.1, pow(episode_num + 1, -0.4))
        epsilon = min(0.3, pow(episode_num + 1, -1.2))

    with open('data/Q.csv', 'w', newline='') as csvfile:
        writer = csv.DictWriter(
            csvfile,
            fieldnames=['episode', 'score', 'steps', 'alpha', 'epsilon'])
        writer.writeheader()
        for episode in log:
            writer.writerow(episode)
    print('Logged')