state = next_state step += 1 if step > config.start_train_step and train_mode: agent.epsilon = 0 # 학습 수행 loss, maxQ, r_i, loss_rl, loss_fm = agent.train_model_RND() loss_list.append(loss) max_Q_list.append(maxQ) r_i_list.append(r_i) loss_rl_list.append(loss_rl) loss_fm_list.append(loss_fm) # 타겟 네트워크 업데이트 if step % (config.target_update_step) == 0: agent.update_target() # 네트워크 모델 저장 if step % config.save_step == 0 and step != 0 and train_mode: agent.save_model(config.load_model, train_mode) reward_list.append(episode_rewards) episode += 1 # 게임 진행 상황 출력 및 텐서 보드에 보상과 손실함수 값 기록 if episode % config.print_episode == 0 and episode != 0: print( "step: {} / episode: {} / reward: {:.2f} / loss_tot: {:.4f} / loss_rl: {:.4f} / loss_fm: {:.6f} / maxQ: {:.2f} / reward_i: {:.6f}" .format(step, episode, np.mean(reward_list), np.mean(loss_list), config.lamb * np.mean(loss_rl_list),
action="store_false", default=True, help="Do not perform the testing phase") parser.add_option("-V", "--video-record", dest="record", action="store", default="video.mp4", type="string", help="Records") options, args = parser.parse_args() with tf.Session() as sess: from agent import (init_phase, bootstrap_phase, learning_phase, testing_phase, update_target, make_video) init_phase(sess) if options.bootstrap: bootstrap_phase(sess) update_target(sess) if options.learning: learning_phase(sess) if options.testing: testing_phase(sess) if options.record: make_video(sess, "videos/final.mp4", 15)