Exemple #1
0
Op = Control()
print('\n collecting experience...')

if os.path.exists('./model/model_gpu_dqn.check'):
    dqn.load()
total_step = 0
for i_episode in range(1000):
    Op.getstate()
    while True:
        action = dqn.choose_action(Op.state)
        # 执行行为
        state_next, reward, terminal = Op.action(action)
        if terminal:
            break
        dqn.store_transition(Op.state, action, reward, state_next)
        if dqn.memory_counter > Memory_capacity:
            dqn.learn()
            print(
                f'Ep:{i_episode} | Ep_r:{round(reward,3)} | total_step:{total_step}'
            )
            if total_step == 50000:
                dqn.save()
                sys.exit()

        if i_episode % 50 == 0:
            dqn.save()
        # 总执行步数加1
        total_step += 1
        # 获取下一个state
        Op.state = state_next