Esempio n. 1
0
def train():
    print('뇌세포 꺠우는 중..')
    sess = tf.Session()

    game = Game(screenWidth, screenHeight, show_game=False)
    brain = DQN(sess, screenWidth, screenHeight, numAction)

    rewards = tf.placeholder(tf.float32, [None])
    tf.summary.scalar('avg.reward/ep.', tf.reduce_mean(rewards))

    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())

    writer = tf.summary.FileWriter('logs', sess.graph)
    summaryMerged = tf.summary.merge_all()

    brain.updateTargetNetwork()

    timeStep = 0
    totalRewardList = []

    for episode in range(maxEpisode):
        terminal = False
        totalReward = 0
        epsilon = 1.0

        state = game.reset()
        brain.initState(state)

        while not terminal:
            if np.random.rand() < epsilon:
                action = random.randrange(numAction)

            else:
                action = brain.getAction()
            if episode > observe:
                epsilon -= 1 / 1000

            state, reward, terminal = game.step(action)
            totalReward += reward
            brain.remember(state, action, reward, terminal)

            if timeStep > observe and timeStep % trainInterval == 0:
                brain.train()
            if timeStep % targetUpdateInterval == 0:
                brain.updateTargetNetwork()

            timeStep += 1

        totalRewardList.append(totalReward)

        if episode % 10 == 0:
            summary = sess.run(summaryMerged,
                               feed_dict={rewards: totalRewardList})
            writer.add_summary(summary, timeStep)
        if episode % 100 == 99:
            print("게임횟수 : {0}, 점수 : {1:.4f}".format(episode + 1, totalReward))
            saver.save(sess, './model/dqn.ckpt', global_step=timeStep)
Esempio n. 2
0
def replay():
    print('뇌세포 깨우는 중..')
    sess = tf.Session()

    game = Game(screenWidth, screenHeight, show_game=True)
    brain = DQN(sess, screenWidth, screenHeight, numAction)

    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state('model')
    saver.restore(sess, ckpt.model_checkpoint_path)

    for episode in range(maxEpisode):
        terminal = False
        totalReward = 0

        state = game.reset()
        brain.initState(state)

        while not terminal:
            action = brain.getAction()
            state, reward, terminal = game.step(action)
            totalReward += reward
            time.sleep(0.3)
        print("게임횟수 : {0}, 점수 : {1}".format(episode + 1, totalReward))