Esempio n. 1
0
            state = next_state
            step += 1

            if step > config.start_train_step and train_mode:
                agent.epsilon = 0
                # 학습 수행
                loss, maxQ, r_i, loss_rl, loss_fm = agent.train_model_RND()
                loss_list.append(loss)
                max_Q_list.append(maxQ)
                r_i_list.append(r_i)
                loss_rl_list.append(loss_rl)
                loss_fm_list.append(loss_fm)

                # 타겟 네트워크 업데이트
                if step % (config.target_update_step) == 0:
                    agent.update_target()

            # 네트워크 모델 저장
            if step % config.save_step == 0 and step != 0 and train_mode:
                agent.save_model(config.load_model, train_mode)

        reward_list.append(episode_rewards)
        episode += 1

        # 게임 진행 상황 출력 및 텐서 보드에 보상과 손실함수 값 기록
        if episode % config.print_episode == 0 and episode != 0:
            print(
                "step: {} / episode: {} / reward: {:.2f} / loss_tot: {:.4f} / loss_rl: {:.4f} / loss_fm: {:.6f} / maxQ: {:.2f} / reward_i: {:.6f}"
                .format(step, episode,
                        np.mean(reward_list), np.mean(loss_list),
                        config.lamb * np.mean(loss_rl_list),
Esempio n. 2
0
                      action="store_false",
                      default=True,
                      help="Do not perform the testing phase")
    parser.add_option("-V",
                      "--video-record",
                      dest="record",
                      action="store",
                      default="video.mp4",
                      type="string",
                      help="Records")
    options, args = parser.parse_args()

    with tf.Session() as sess:
        from agent import (init_phase, bootstrap_phase, learning_phase,
                           testing_phase, update_target, make_video)
        init_phase(sess)

        if options.bootstrap:
            bootstrap_phase(sess)

        update_target(sess)

        if options.learning:
            learning_phase(sess)

        if options.testing:
            testing_phase(sess)

        if options.record:
            make_video(sess, "videos/final.mp4", 15)