# valid_angles = list(range(5, 86, 5)) # 5도부터 85도까지 5도씩 증가 # valid_taptimes = list(range(500, 2501, 100)) # 500부터 2500까지 100씩 증가 # valid_angles = [8, 10, 11, 14, 17, 18, 19, 20, 21, 22, 23, 26, 30, 31, 34, 35, 36, 46, 61, 65, 67, 70] # 55제외 valid_angles = [ 8, 14, 17, 18, 19, 20, 21, 22, 23, 24, 26, 30, 31, 34, 35, 36, 40, 46, 50, 55, 61, 65, 67, 70, 75 ] valid_taptimes = [ 600, 700, 900, 1000, 1100, 1200, 1300, 1500, 1600, 1700, 1800, 2000, 2500 ] # Create a global step variable global_step = tf.Variable(0, name='global_step', trainable=False) angle_estimator = q_network.DQN_Estimator(scope="angle_estimator", output_size=len(valid_angles), summaries_dir=SUMM_PATH) angle_target_estimator = q_network.DQN_Estimator( scope="angle_target_estimator", output_size=len(valid_angles)) taptime_estimator = q_network.DQN_Estimator(scope="taptime_estimator", output_size=len(valid_taptimes), summaries_dir=SUMM_PATH, angle_as_state=len(valid_angles)) taptime_target_estimator = q_network.DQN_Estimator( scope="taptime_target_estimator", output_size=len(valid_taptimes), angle_as_state=len(valid_angles)) # angle_estimator, angle_target_estimator = DQN_Estimator(obs_size, sess, fe, sc_parser, "angle", valid_angles) # 수정 필요 # taptime_estimator, taptime_target_estimator = DQN_Estimator(obs_size, sess, fe, sc_parser, "taptime", valid_taptimes) # 수정 필요 # Keeps track of useful statistics
# valid_angles = list(range(5, 86, 5)) # 5도부터 85도까지 5도씩 증가 # valid_angles = dqn_utils.get_valid_angles() # valid_taptimes = list(range(500, 2501, 100)) # 500부터 2500까지 100씩 증가 valid_angles = [8, 14, 17, 18, 19, 20, 21, 22, 23, 24, 26, 30, 31, 34, 35, 36, 40, 46, 50, 55, 61, 65, 67, 70, 75] # [8, 10, 11, 14, 17, 18, 19, 20, 21, 22, 23, 26, 30, 31, 34, 35, 36, 46, 61, 65, 67, 70] # 55제외 valid_taptimes = [600, 700, 900, 1000, 1100, 1200, 1300, 1500, 1600, 1700, 1800, 2000, 2500] # Create a global step variable # 일단 시작은 0이고, checkpoint를 불러오면 저장된 global_step이 들어오는건가... global_step = tf.Variable(0, name='global_step', trainable=False) # initialize Q-network if model_type == 1: estimator = q_network.DQN_Estimator(scope="estimator", angle_output_size=len(valid_angles), taptime_output_size=len(valid_taptimes)) target_estimator = q_network.DQN_Estimator(scope="target_estimator", angle_output_size=len(valid_angles), taptime_output_size=len(valid_taptimes)) elif model_type == 2: angle_estimator = q_network_parallelNN.DQN_Estimator(scope="angle_estimator", output_size=len(valid_angles)) angle_target_estimator = q_network_parallelNN.DQN_Estimator(scope="angle_target_estimator", output_size=len(valid_angles)) taptime_estimator = q_network_parallelNN.DQN_Estimator(scope="taptime_estimator", output_size=len(valid_taptimes), angle_as_state = len(valid_angles)) taptime_target_estimator = q_network_parallelNN.DQN_Estimator(scope="taptime_target_estimator", output_size=len(valid_taptimes), angle_as_state = len(valid_angles)) # Keeps track of useful statistics EpisodeStats = namedtuple("Stats",["episode_lengths", "episode_rewards"]) stats = EpisodeStats( # level별 episode_length랑, episode_reward를 저장해 둘 수 있는 list episode_lengths=[[] for i in range(21)], episode_rewards=[[] for i in range(21)]) ##### Open tensorflow session # config = tf.ConfigProto(allow_soft_placement=True) # with tf.Session(config=config) as sess: with tf.Session() as sess: # pdb.set_trace()