def __init__(self):
        if not os.path.exists(CHECKPOINT_DIR):
            os.mkdir(CHECKPOINT_DIR)

        # self.evluation_gap = 10**6
        print(MAX_TIME_STEP)
        self.device = "/gpu:0" if USE_GPU else "/cpu:0"
        self.network_scope = TASK_TYPE
        self.list_of_tasks = TASK_LIST
        self.scene_scopes = self.list_of_tasks.keys()
        self.global_t = 0
        self.stop_requested = False

        self.initial_learning_rate = self.log_uniform(LR_ALPHA_LOW,
                                                      LR_ALPHA_HIGH,
                                                      LR_ALPHA_LOG_RATE)

        self.global_network = DRLNetwork(action_size=ACTION_SIZE,
                                         device=self.device,
                                         network_scope=self.network_scope,
                                         scene_scopes=self.scene_scopes)

        self.branches = []
        for scene in self.scene_scopes:
            for task in self.list_of_tasks[scene]:
                self.branches.append((scene, task))

        self.NUM_TASKS = len(self.branches)
        assert NUM_THREADS >= self.NUM_TASKS, \
            "Not enough threads for multitasking: at least {} threads needed.".format(self.NUM_TASKS)

        self.learning_rate_input = tf.placeholder("float")
        self.grad_applier = RMSPropApplier(
            learning_rate=self.learning_rate_input,
            decay=RMSP_ALPHA,
            momentum=0.0,
            epsilon=RMSP_EPSILON,
            clip_norm=GRAD_NORM_CLIP,
            device=self.device)

        # instantiate each training thread
        # each thread is training for one target in one scene
        self.training_threads = []
        for i in range(NUM_THREADS):
            scene, task = self.branches[i % self.NUM_TASKS]
            training_thread = ADQN_Thread(i,
                                          self.global_network,
                                          self.initial_learning_rate,
                                          self.learning_rate_input,
                                          self.grad_applier,
                                          MAX_TIME_STEP,
                                          device=self.device,
                                          network_scope="thread-%d" % (i + 1),
                                          scene_scope=scene,
                                          task_scope=task)
            self.training_threads.append(training_thread)
                                          network_scope=network_scope,
                                          scene_scopes=scene_scopes)

    branches = []
    for scene in scene_scopes:
        for task in list_of_tasks[scene]:
            branches.append((scene, task))

    NUM_TASKS = len(branches)
    assert PARALLEL_SIZE >= NUM_TASKS, \
      "Not enough threads for multitasking: at least {} threads needed.".format(NUM_TASKS)

    learning_rate_input = tf.placeholder("float")
    grad_applier = RMSPropApplier(learning_rate=learning_rate_input,
                                  decay=RMSP_ALPHA,
                                  momentum=0.0,
                                  epsilon=RMSP_EPSILON,
                                  clip_norm=GRAD_NORM_CLIP,
                                  device=device)

    # instantiate each training thread
    # each thread is training for one target in one scene
    training_threads = []
    for i in range(PARALLEL_SIZE):
        scene, task = branches[i % NUM_TASKS]
        training_thread = A3CTrainingThread(i,
                                            global_network,
                                            initial_learning_rate,
                                            learning_rate_input,
                                            grad_applier,
                                            MAX_TIME_STEP,
                                            device=device,