def replay(): print('dqn_setting') sess = tf.Session() game = Sim(SCREEN_WIDTH, SCREEN_HEIGHT, show_game=True) brain = DQN(sess, VIEW_WIDTH, VIEW_HEIGHT, NUM_ACTION) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state('model') saver.restore(sess, ckpt.model_checkpoint_path) # start game for episode in range(MAX_EPISODE): terminal = False total_reward = 0 state = game.Reset() brain.init_state(state) while not terminal: action = brain.get_action() print('action_choice : ' + str(action)) # get data state, reward, terminal = game.Update(action) total_reward += reward brain.remember(state, action, reward, terminal) # show the play time.sleep(10) print('Number of game: %d Score: %d' % (episode + 1, total_reward))
def train(): print('dqn_setting') sess = tf.Session() game = Sim(SCREEN_WIDTH, SCREEN_HEIGHT) brain = DQN(sess, VIEW_WIDTH, VIEW_HEIGHT, NUM_ACTION) rewards = tf.placeholder(tf.float32, [None]) tf.summary.scalar('avg.reward/ep.', tf.reduce_mean(rewards)) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter('logs', sess.graph) summary_merged = tf.summary.merge_all() # initialize target network brain.update_target_network() # decide action decision time using dqn after this time epsilon = 1.0 # number of frames time_step = 0 total_reward_list = [] # Starting for episode in range(MAX_EPISODE): terminal = False total_reward = 0 # gmae reset and get state (=poisition data) state = game.Reset() brain.init_state(state) # set state of dqn while not terminal: # choice random action when epsilon < random var, else ( :epilon > random var), choice with dqn # Later time, unless use random action if np.random.rand() < epsilon: action = game.get_action() else: action = brain.get_action() # epsion decrease if episode > OBSERVE: epsilon -= 1 / 1000 # game updates, get data (state / reward / is_gameover) state, reward, terminal = game.Update(action) total_reward += reward # brain save this state brain.remember(state, action, reward, terminal) # After little time, In interval, do training if time_step > OBSERVE and time_step % TRAIN_INTERVAL == 0: brain.train() # In interval, do training if time_step % TARGET_UPDATE_INTERVAL == 0: brain.update_target_network() time_step += 1 print('Number of game: %d Score: %d' % (episode + 1, total_reward)) f = open('ac.txt', 'rw') f.write('Number of game: %d Score: %d ::\n' % (episode + 1, total_reward)) f.close() total_reward_list.append(total_reward) if episode % 10 == 0: summary = sess.run(summary_merged, feed_dict={rewards: total_reward_list}) writer.add_summary(summary, time_step) total_reward_list = [] if episode % 100 == 0: saver.save(sess, 'model/dqn.ckpt', global_step=time_step)