latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) # saver.restore(sess, latest_checkpoint) try: total_t = sess.run(tf.train.get_global_step()) except: total_t = 0 epsilon_start = 1.0 epsilon_end = 0.1 epsilon_decay_steps = 500000 epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) policy_angle = dqn_utils.make_epsilon_greedy_policy( angle_estimator, len(valid_angles)) policy_taptime = dqn_utils.make_epsilon_greedy_policy( taptime_estimator, len(valid_taptimes)) ######################################## ##### Populating replay memory (size: N) # 원래는 랜덤하게 N번의 shot을 해서 replay_memory를 채워야 하지만... # 각 레벨별로, 0도부터 90도까지 쏜 데이터를 replay_memory로 함. # pre_train을 넣어서, 이 replay_memory로 학습을 한 weight를 가져와서 시작하는 것도 고려. Transition = namedtuple( "Transition", ["state", "action", "reward", "next_state", "game_state"]) replay_memory_size = 500000 print('Populating replay memory...') replay_memory = []
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) # path를 반환 if latest_checkpoint: print("Loading model checkpoint {}...\n".format(latest_checkpoint)) saver.restore(sess, latest_checkpoint) total_t = sess.run(tf.train.get_global_step()) # 처음에 안됐었던 이유는, global_step이란 tensor 변수를 안만들어서임 ## user parameter epsilon_start = 1.0 epsilon_end = 0.1 epsilon_decay_steps = 500000 epsilons = np.linspace(epsilon_start, epsilon_end, epsilon_decay_steps) policy = dqn_utils.make_epsilon_greedy_policy( estimator, [len(valid_angles), len(valid_taptimes)]) ######################################## ##### Populating replay memory (size: N) # 원래는 랜덤하게 N번의 shot을 해서 replay_memory를 채워야 하지만... # 각 레벨별로, 0도부터 90도까지 쏜 데이터를 replay_memory로 함. batch_size = 6 discount_factor = 0.99 Transition = namedtuple( "Transition", ["state", "action", "reward", "next_state", "game_state"]) replay_memory_size = 500000 print('Populating replay memory...') # replay_memory = [] # pretrain_memory가 사실 replay_memory가 되는게 아닌가? # with open(os.path.join(EXP_PATH, 'pretrain_memory_5'), 'rb') as f: with open(os.path.join(EXP_PATH, 'replay_memoryAll'), 'rb') as f: