예제 #1
0
                ms, ma = env_model.sample_s_a()  # ms in here is a str
                mr, ms_ = env_model.get_r_s_(ms, ma)
                RL.learn(ms, ma, mr, str(ms_))

                # print(env_model.database)
                # print('################')
                # print(RL.q_table)
                # print('################')
            s = s_
            s2 = s2_
            if done:
                s = env.reset()
                break

            if done2:
                s2 = env.reset2()
                break

    # end of game
    print('game over')
    print(RL.q_table)
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    env_model = EnvModel(actions=list(range(env.n_actions)))
    RL = Learning(actions=list(range(env.n_actions)))
    env.after(0, update)
    env.mainloop()
예제 #2
0
파일: play.py 프로젝트: jlinbb/dqn_demo
from env import Maze
from q_learning import QLearning


def update():
    for episode in range(20):
        state = env.reset()
        step_count, done = 0, False
        while not done:
            env.render()
            action = RL.choose_action(str(state))
            state_, reward, done = env.step(action)
            step_count += 1
            RL.learn(str(state), action, reward, str(state_))
            state = state_
        print(' Round over at: {0} round, Total steps: {1} steps'.format(
            episode, step_count))


if __name__ == '__main__':
    env = Maze()
    agent = QLearning(actions=list(range(env.n_actions)))

    env.after(100, update())
    # env.mainloop()

    print('\n Q Table')
    print(agent.q_table)
    agent.q_table.to_csv('Q_Table.csv')
예제 #3
0
            state_, reward, done = env.step(action)

            step_count += 1  # 增加步数

            # 机器人大脑从这个过渡(transition) (state, action, reward, state_) 中学习
            RL.learn(str(state), action, reward, str(state_))

            # 机器人移动到下一个 state
            state = state_

            # 如果踩到炸弹或者找到宝藏, 这回合就结束了
            if done:
                print("回合 {} 结束. 总步数 : {}\n".format(episode + 1, step_count))
                break

    # 结束游戏并关闭窗口
    print('游戏结束')
    env.destroy()


if __name__ == "__main__":
    # 创建环境 env 和 RL
    env = Maze()
    RL = QLearning(actions=list(range(env.n_actions)))

    # 开始可视化环境
    env.after(100, update)
    env.mainloop()

    print('\nQ 表:')
    print(RL.q_table)
예제 #4
0
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            ## break while loop when end of this episode
            #if done:
            #break
            step += 1
            time.sleep(60)


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()