Ejemplo n.º 1
0
You will see the sarsa is more coward when punishment is close because it cares about all behaviours,
while q learning is more brave because it only cares about maximum behaviour.
"""

import sys
import os
sys.path.append(os.getcwd())

import pandas as pd
import time
from maze_env import Maze
from RL_brain import SarsaLambdaTable

if __name__ == '__main__':
    env = Maze()
    RL = SarsaLambdaTable(actions=env.action_space)

    def update():
        log = []
        for episode in range(100):
            s = env.reset()
            a = RL.choose_action(str(s))
            RL.eligibility_trace *= 0  # 清空路径
            step_count = 0
            done = False
            r = 0
            while not done:
                env.render()
                s_, r, done = env.step(a)
                a_ = RL.choose_action(str(s_))
Ejemplo n.º 2
0
            # RL take action and get next observation and reward
            observation_, reward, done = env.step(
                action)  # 获取当前动作后的环境和reward情况

            # RL choose action based on next observation
            action_ = RL.choose_action(str(observation_))  # 根据环境选择下一个动作

            # RL learn from this transition (s, a, r, s, a) ==> Sarsa
            RL.learn(str(observation), action, reward, str(observation_),
                     action_)

            # swap observation and action
            observation = observation_
            action = action_

            # break while loop when end of this episode
            if done:
                break

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = SarsaLambdaTable(actions=list(range(env.n_actions)))

    env.after(100, update)
    env.mainloop()
Ejemplo n.º 3
0
            # RL choose action based on next observation
            action_ = RL.choose_action(str(observation_))

            # RL learn from this transition (s, a, r, s, a) ==> Sarsa
            RL.learn(str(observation), action, reward, str(observation_),
                     action_)

            # swap observation and action
            observation = observation_
            action = action_

            # break while loop when end of this episode
            if done:
                #保存q_table时使用
                if RL.read_save:
                    RL.save_q_table()
                break

    # end of game
    print('game over')
    print("Q_table:\n", RL.q_table)
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = SarsaLambdaTable(actions=list(range(env.n_actions)),
                          read_save=read_save)
    env.after(100, update)
    env.mainloop()