Op = Control() print('\n collecting experience...') if os.path.exists('./model/model_gpu_dqn.check'): dqn.load() total_step = 0 for i_episode in range(1000): Op.getstate() while True: action = dqn.choose_action(Op.state) # 执行行为 state_next, reward, terminal = Op.action(action) if terminal: break dqn.store_transition(Op.state, action, reward, state_next) if dqn.memory_counter > Memory_capacity: dqn.learn() print( f'Ep:{i_episode} | Ep_r:{round(reward,3)} | total_step:{total_step}' ) if total_step == 50000: dqn.save() sys.exit() if i_episode % 50 == 0: dqn.save() # 总执行步数加1 total_step += 1 # 获取下一个state Op.state = state_next