コード例 #1
0
            observation = observation_

            # 如果终止, 就跳出循环
            if done:
                steps.append(step)
                episodes.append(episode)
                break
            step += 1   # 总步数

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = DoubleDQN(env.n_actions,
                      env.n_features,#observation/state 的属性,如长宽高
                      learning_rate=0.01,
                      reward_decay=0.9,
                      dueling=True,
                      e_greedy=0.9,
                      replace_target_iter=200,  # 每 200 步替换一次 target_net 的参数
                      memory_size=2000, # 记忆上限
                      # output_graph=True   # 是否输出 tensorboard 文件
                      )
    env.after(100, run_maze)#进行强化学习训练
    env.mainloop()

    # 观看训练时间曲线
    his_double = np.vstack((episodes, steps))
コード例 #2
0
from agent import Agent, D_Q_Agent
import time

maze = '2'

if maze == '1':
    from maze_env1 import Maze
elif maze == '2':
    from maze_env2 import Maze

if __name__ == "__main__":
    ### START CODE HERE ###
    # This is an agent with random policy. You can learn how to interact with the environment through the code below.
    # Then you can delete it and write your own code.

    env = Maze()
    training_epoch = 100 if maze == '1' else 1000
    agent = D_Q_Agent(training_epoch)

    for episode in range(training_epoch):
        agent.if_rewarded = False
        s = env.reset()
        while True:
            # env.render()                 # You can comment all render() to turn off the graphical interface in training process to accelerate your code.
            chosen_direction = agent.choose_action(s, episode)
            s_, r, done = env.step(chosen_direction)
            agent.update_Q_value(s, chosen_direction, r)
            if s_[-1]:
                agent.if_rewarded = True
                agent.if_rewarded_in_the_whole_training = True
            s = s_