Beispiel #1
0
# valid_angles = list(range(5, 86, 5)) # 5도부터 85도까지 5도씩 증가
# valid_taptimes = list(range(500, 2501, 100))  # 500부터 2500까지 100씩 증가
# valid_angles = [8, 10, 11, 14, 17, 18, 19, 20, 21, 22, 23, 26, 30, 31, 34, 35, 36, 46, 61, 65, 67, 70] # 55제외
valid_angles = [
    8, 14, 17, 18, 19, 20, 21, 22, 23, 24, 26, 30, 31, 34, 35, 36, 40, 46, 50,
    55, 61, 65, 67, 70, 75
]
valid_taptimes = [
    600, 700, 900, 1000, 1100, 1200, 1300, 1500, 1600, 1700, 1800, 2000, 2500
]

# Create a global step variable
global_step = tf.Variable(0, name='global_step', trainable=False)

angle_estimator = q_network.DQN_Estimator(scope="angle_estimator",
                                          output_size=len(valid_angles),
                                          summaries_dir=SUMM_PATH)
angle_target_estimator = q_network.DQN_Estimator(
    scope="angle_target_estimator", output_size=len(valid_angles))
taptime_estimator = q_network.DQN_Estimator(scope="taptime_estimator",
                                            output_size=len(valid_taptimes),
                                            summaries_dir=SUMM_PATH,
                                            angle_as_state=len(valid_angles))
taptime_target_estimator = q_network.DQN_Estimator(
    scope="taptime_target_estimator",
    output_size=len(valid_taptimes),
    angle_as_state=len(valid_angles))
# angle_estimator, angle_target_estimator = DQN_Estimator(obs_size, sess, fe, sc_parser, "angle", valid_angles) # 수정 필요
# taptime_estimator, taptime_target_estimator = DQN_Estimator(obs_size, sess, fe, sc_parser, "taptime", valid_taptimes) # 수정 필요

# Keeps track of useful statistics
Beispiel #2
0
# valid_angles = list(range(5, 86, 5)) # 5도부터 85도까지 5도씩 증가
# valid_angles = dqn_utils.get_valid_angles()
# valid_taptimes = list(range(500, 2501, 100))  # 500부터 2500까지 100씩 증가
valid_angles = [8, 14, 17, 18, 19, 20, 21, 22, 23, 24, 26, 30, 31, 34, 35, 36, 40, 46, 50, 55, 61, 65, 67, 70, 75]
# [8, 10, 11, 14, 17, 18, 19, 20, 21, 22, 23, 26, 30, 31, 34, 35, 36, 46, 61, 65, 67, 70] # 55제외
valid_taptimes = [600, 700, 900, 1000, 1100, 1200, 1300, 1500, 1600, 1700, 1800, 2000, 2500]

# Create a global step variable # 일단 시작은 0이고, checkpoint를 불러오면 저장된 global_step이 들어오는건가...
global_step = tf.Variable(0, name='global_step', trainable=False)

# initialize Q-network
if model_type == 1:
	estimator = q_network.DQN_Estimator(scope="estimator", angle_output_size=len(valid_angles), taptime_output_size=len(valid_taptimes))
	target_estimator = q_network.DQN_Estimator(scope="target_estimator", angle_output_size=len(valid_angles), taptime_output_size=len(valid_taptimes))
elif model_type == 2:
	angle_estimator = q_network_parallelNN.DQN_Estimator(scope="angle_estimator", output_size=len(valid_angles))
	angle_target_estimator = q_network_parallelNN.DQN_Estimator(scope="angle_target_estimator", output_size=len(valid_angles))
	taptime_estimator = q_network_parallelNN.DQN_Estimator(scope="taptime_estimator", output_size=len(valid_taptimes), angle_as_state = len(valid_angles))
	taptime_target_estimator = q_network_parallelNN.DQN_Estimator(scope="taptime_target_estimator", output_size=len(valid_taptimes), angle_as_state = len(valid_angles))

# Keeps track of useful statistics
EpisodeStats = namedtuple("Stats",["episode_lengths", "episode_rewards"])
stats = EpisodeStats( # level별 episode_length랑, episode_reward를 저장해 둘 수 있는 list
        episode_lengths=[[] for i in range(21)],
        episode_rewards=[[] for i in range(21)])

##### Open tensorflow session
# config = tf.ConfigProto(allow_soft_placement=True)
# with tf.Session(config=config) as sess:
with tf.Session() as sess:
	# pdb.set_trace()