def __init__(self): if not os.path.exists(CHECKPOINT_DIR): os.mkdir(CHECKPOINT_DIR) # self.evluation_gap = 10**6 print(MAX_TIME_STEP) self.device = "/gpu:0" if USE_GPU else "/cpu:0" self.network_scope = TASK_TYPE self.list_of_tasks = TASK_LIST self.scene_scopes = self.list_of_tasks.keys() self.global_t = 0 self.stop_requested = False self.initial_learning_rate = self.log_uniform(LR_ALPHA_LOW, LR_ALPHA_HIGH, LR_ALPHA_LOG_RATE) self.global_network = DRLNetwork(action_size=ACTION_SIZE, device=self.device, network_scope=self.network_scope, scene_scopes=self.scene_scopes) self.branches = [] for scene in self.scene_scopes: for task in self.list_of_tasks[scene]: self.branches.append((scene, task)) self.NUM_TASKS = len(self.branches) assert NUM_THREADS >= self.NUM_TASKS, \ "Not enough threads for multitasking: at least {} threads needed.".format(self.NUM_TASKS) self.learning_rate_input = tf.placeholder("float") self.grad_applier = RMSPropApplier( learning_rate=self.learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON, clip_norm=GRAD_NORM_CLIP, device=self.device) # instantiate each training thread # each thread is training for one target in one scene self.training_threads = [] for i in range(NUM_THREADS): scene, task = self.branches[i % self.NUM_TASKS] training_thread = ADQN_Thread(i, self.global_network, self.initial_learning_rate, self.learning_rate_input, self.grad_applier, MAX_TIME_STEP, device=self.device, network_scope="thread-%d" % (i + 1), scene_scope=scene, task_scope=task) self.training_threads.append(training_thread)
network_scope=network_scope, scene_scopes=scene_scopes) branches = [] for scene in scene_scopes: for task in list_of_tasks[scene]: branches.append((scene, task)) NUM_TASKS = len(branches) assert PARALLEL_SIZE >= NUM_TASKS, \ "Not enough threads for multitasking: at least {} threads needed.".format(NUM_TASKS) learning_rate_input = tf.placeholder("float") grad_applier = RMSPropApplier(learning_rate=learning_rate_input, decay=RMSP_ALPHA, momentum=0.0, epsilon=RMSP_EPSILON, clip_norm=GRAD_NORM_CLIP, device=device) # instantiate each training thread # each thread is training for one target in one scene training_threads = [] for i in range(PARALLEL_SIZE): scene, task = branches[i % NUM_TASKS] training_thread = A3CTrainingThread(i, global_network, initial_learning_rate, learning_rate_input, grad_applier, MAX_TIME_STEP, device=device,