コード例 #1
0

if __name__ == "__main__":
    env = Maze()
    RL = DuelingDQN(
        env.n_actions,
        env.n_features,  #observation/state 的属性,如长宽高
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        dueling=True,
        replace_target_iter=200,  # 每 200 步替换一次 target_net 的参数
        memory_size=2000,  # 记忆上限
        # output_graph=True   # 是否输出 tensorboard 文件
    )
    env.after(100, run_maze)  #进行强化学习训练
    env.mainloop()
    # 观看训练时间曲线
    his_dueling = np.vstack((episodes, steps))
    file = open('his_dueling', 'wb')
    pickle.dump(his_dueling, file)
    file.close()

    plt.plot(his_dueling[0, :],
             his_dueling[1, :] - his_dueling[1, 0],
             c='b',
             label='Dueling DQN')
    plt.legend(loc='best')  # legend图例,其中’loc’参数有多种,’best’表示自动分配最佳位置
    plt.ylabel('total training time')
    plt.xlabel('episode')
    plt.grid()  # 显示网格线 1=True=默认显示;0=False=不显示
コード例 #2
0
            env.render()

            # RL take action and get next observation and reward
            observation_, reward, done = env.step(action)

            # RL choose action based on next observation
            action_ = RL.choose_action(str(observation_))

            # RL learn from this transition (s, a, r, s, a) ==> Sarsa
            RL.learn(str(observation), action, reward, str(observation_),
                     action_)

            # swap observation and action
            observation = observation_
            action = action_

            # break while loop when end of this episode
            if done:
                break

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = SarsaTable(actions=list(range(env.n_actions)))

    env.after(100, update)
    env.mainloop()