def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads()
def __init__(self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() if cfg.WEBLY.WEBLY_ON and cfg.WEBLY.BAGGING_MIXUP: self._class2idx = {} for im_i, entry in enumerate(self._roidb): if im_i % 1000 == 0: logger.info(' {:d}/{:d}'.format(im_i, len(self._roidb))) gt_inds = np.where(entry['gt_classes'] > 0)[0] # print(gt_inds, entry) # assert len(gt_inds) == 1, 'Only one ground truth for image is allowed.' gt_classes = entry['gt_classes'][gt_inds].copy() if gt_classes[0] not in self._class2idx.keys(): self._class2idx[gt_classes[0]] = [] self._class2idx[gt_classes[0]].append(im_i) self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads()
class RoIDataLoader(object): def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() if cfg.REID.TRIPLET_LOSS: self._get_roidb_gt() self._P = cfg.REID.P self._K = cfg.REID.K if cfg.REID.TRIPLET_LOSS_CROSS: self._num_loaders = 1 self._cur_iter = 0 self._cur_gpu = 0 self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads() def _get_roidb_gt(self): self._class2idx = {} for im_i, entry in enumerate(self._roidb): if im_i % 1000 == 0: logger.info(' {:d}/{:d}'.format(im_i, len(self._roidb))) gt_inds = np.where(entry['gt_classes'] > 0)[0] assert len(gt_inds) == 1, 'Only one ground truth for image is allowed.' gt_classes = entry['gt_classes'][gt_inds].copy() if gt_classes[0] - 1 not in self._class2idx.keys(): self._class2idx[gt_classes[0] - 1] = [] self._class2idx[gt_classes[0] - 1].append(im_i) self._num_classes = len(self._class2idx.keys()) self._class = [] def set_start_iter(self, start_iter): self._cur_iter = start_iter def _update_cur_iter(self): with self._lock: self._cur_gpu = self._cur_gpu + 1 if self._cur_gpu < cfg.NUM_GPUS: return self._cur_iter = self._cur_iter + 1 self._cur_gpu = 0 def get_num_iter_per_epoch_triplet(self): return int(self._num_classes / self._P / cfg.NUM_GPUS) def get_num_iter_per_epoch(self): if cfg.TRAIN.USE_FLIPPED: return int(len(self._roidb) / cfg.TRAIN.IMS_PER_BATCH / cfg.NUM_GPUS / 2) else: return int(len(self._roidb) / cfg.TRAIN.IMS_PER_BATCH / cfg.NUM_GPUS) def minibatch_loader_thread(self): """Load mini-batches and put them onto the mini-batch queue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): blobs = self.get_next_minibatch() # Blobs must be queued in the order specified by # self.get_output_names ordered_blobs = OrderedDict() for key in self.get_output_names(): assert blobs[key].dtype in (np.int32, np.float32), \ 'Blob {} of dtype {} must have dtype of ' \ 'np.int32 or np.float32'.format(key, blobs[key].dtype) ordered_blobs[key] = blobs[key] coordinated_put( self.coordinator, self._minibatch_queue, ordered_blobs ) if cfg.REID.TRIPLET_LOSS and cfg.REID.TRIPLET_LOSS_CROSS: self._update_cur_iter() logger.info('Stopping mini-batch loading thread') def enqueue_blobs_thread(self, gpu_id, blob_names): """Transfer mini-batches from a mini-batch queue to a BlobsQueue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): if self._minibatch_queue.qsize == 0: logger.warning('Mini-batch queue is empty') blobs = coordinated_get(self.coordinator, self._minibatch_queue) self.enqueue_blobs(gpu_id, blob_names, blobs.values()) logger.debug( 'batch queue size {}'.format(self._minibatch_queue.qsize()) ) logger.info('Stopping enqueue thread') def get_next_minibatch(self): """Return the blobs to be used for the next minibatch. Thread safe.""" valid = False while not valid: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] blobs, valid = get_minibatch(minibatch_db) return blobs def _shuffle_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" if cfg.TRAIN.ASPECT_GROUPING: widths = np.array([r['width'] for r in self._roidb]) heights = np.array([r['height'] for r in self._roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.random.permutation(np.arange(inds.shape[0])) inds = np.reshape(inds[row_perm, :], (-1, )) self._perm = inds else: self._perm = np.random.permutation(np.arange(len(self._roidb))) self._perm = deque(self._perm) self._cur = 0 def _get_next_minibatch_inds(self): if cfg.REID.TRIPLET_LOSS and cfg.REID.TRIPLET_LOSS_CROSS: cur_ep = int(self._cur_iter / self.get_num_iter_per_epoch()) if cur_ep > cfg.REID.TRIPLET_LOSS_START and cur_ep % 2 == 1: if self._cur_iter % self.get_num_iter_per_epoch() > self.get_num_iter_per_epoch_triplet(): while self._cur_iter % self.get_num_iter_per_epoch() > self.get_num_iter_per_epoch_triplet(): self._update_cur_iter() else: return self._get_next_minibatch_inds_triplet_loss() elif cfg.REID.TRIPLET_LOSS: return self._get_next_minibatch_inds_triplet_loss() """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur >= len(self._perm): self._shuffle_roidb_inds() return db_inds def _get_next_minibatch_inds_triplet_loss(self): with self._lock: if len(self._class) < self._P: self._class = self._class2idx.keys() random.shuffle(self._class) db_inds = [] for p in range(self._P): key = self._class.pop() population = self._class2idx[key] if len(population) < self._K: population = population * self._K im_idx = random.sample(population, self._K) db_inds.extend(im_idx) return db_inds def get_output_names(self): return self._output_names def enqueue_blobs(self, gpu_id, blob_names, blobs): """Put a mini-batch on a BlobsQueue.""" assert len(blob_names) == len(blobs) t = time.time() dev = c2_utils.CudaDevice(gpu_id) queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name) blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names] for (blob_name, blob) in zip(blob_names, blobs): workspace.FeedBlob(blob_name, blob, device_option=dev) logger.debug( 'enqueue_blobs {}: workspace.FeedBlob: {}'. format(gpu_id, time.time() - t) ) t = time.time() op = core.CreateOperator( 'SafeEnqueueBlobs', [queue_name] + blob_names, blob_names + [queue_name + '_enqueue_status'], device_option=dev ) workspace.RunOperatorOnce(op) logger.debug( 'enqueue_blobs {}: workspace.RunOperatorOnce: {}'. format(gpu_id, time.time() - t) ) def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory self._workers = [ threading.Thread(target=self.minibatch_loader_thread) for _ in range(self._num_loaders) ] # Create one BlobsQueue per GPU # (enqueue_blob_names are unscoped) enqueue_blob_names = self.create_blobs_queues() # Create one enqueuer thread per GPU self._enqueuers = [ threading.Thread( target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names) ) for gpu_id in range(self._num_gpus) ] def start(self, prefill=False): for w in self._workers + self._enqueuers: w.setDaemon(True) w.start() if prefill: logger.info('Pre-filling mini-batch queue...') while not self._minibatch_queue.full(): logger.info( ' [{:d}/{:d}]'.format( self._minibatch_queue.qsize(), self._minibatch_queue.maxsize ) ) time.sleep(0.1) # Detect failure and shutdown if self.coordinator.should_stop(): self.shutdown() break def has_stopped(self): return self.coordinator.should_stop() def shutdown(self): self.coordinator.request_stop() self.coordinator.wait_for_stop() self.close_blobs_queues() for w in self._workers + self._enqueuers: w.join() def create_blobs_queues(self): """Create one BlobsQueue for each GPU to hold mini-batches.""" for gpu_id in range(self._num_gpus): with c2_utils.GpuNameScope(gpu_id): workspace.RunOperatorOnce( core.CreateOperator( 'CreateBlobsQueue', [], [self._blobs_queue_name], num_blobs=len(self.get_output_names()), capacity=self._blobs_queue_capacity ) ) return self.create_enqueue_blobs() def close_blobs_queues(self): """Close a BlobsQueue.""" for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): workspace.RunOperatorOnce( core.CreateOperator( 'CloseBlobsQueue', [self._blobs_queue_name], [] ) ) def create_enqueue_blobs(self): blob_names = self.get_output_names() enqueue_blob_names = [ '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names ] for gpu_id in range(self._num_gpus): with c2_utils.NamedCudaScope(gpu_id): for blob in enqueue_blob_names: workspace.CreateBlob(core.ScopedName(blob)) return enqueue_blob_names def register_sigint_handler(self): def signal_handler(signal, frame): logger.info( 'SIGINT: Shutting down RoIDataLoader threads and exiting...' ) self.shutdown() signal.signal(signal.SIGINT, signal_handler)
class RoIDataLoader(object): def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads() def minibatch_loader_thread(self): """Load mini-batches and put them onto the mini-batch queue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): blobs = self.get_next_minibatch() # Blobs must be queued in the order specified by # self.get_output_names ordered_blobs = OrderedDict() for key in self.get_output_names(): assert blobs[key].dtype in (np.int32, np.float32), \ 'Blob {} of dtype {} must have dtype of ' \ 'np.int32 or np.float32'.format(key, blobs[key].dtype) ordered_blobs[key] = blobs[key] coordinated_put( self.coordinator, self._minibatch_queue, ordered_blobs ) logger.info('Stopping mini-batch loading thread') def enqueue_blobs_thread(self, gpu_id, blob_names): """Transfer mini-batches from a mini-batch queue to a BlobsQueue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): if self._minibatch_queue.qsize == 0: logger.warning('Mini-batch queue is empty') blobs = coordinated_get(self.coordinator, self._minibatch_queue) self.enqueue_blobs(gpu_id, blob_names, blobs.values()) logger.debug( 'batch queue size {}'.format(self._minibatch_queue.qsize()) ) logger.info('Stopping enqueue thread') def get_next_minibatch(self): """Return the blobs to be used for the next minibatch. Thread safe.""" valid = False while not valid: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] blobs, valid = get_minibatch(minibatch_db) return blobs def _shuffle_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" if cfg.MODEL.TRACKING_ON: # Ensure that individual sequences don't get mixed up roidbs = [[el for el in self._roidb if el['dataset'].name == name] for name in cfg.TRAIN.DATASETS] perms = [None] * len(roidbs) for i, roidb in enumerate(roidbs): n_idx = len(roidb) # Sample from a window of `TRCNN.FRAME_DIST_MAX` seconds # which is equal to `delta_frames / framerate` delta_frames = int(cfg.TRCNN.FRAME_DIST_MAX * float(roidb[0]['dataset'].COCO.dataset["info"]['frame_rate'])) perms[i] = np.random.permutation(np.arange(len(roidb))) perm_one = perms[i] + np.random.randint(delta_frames + 1, size=n_idx) perm_one = np.array([idx if idx < n_idx else n_idx - 1 for idx in perm_one]) perm_two = perms[i].copy() off = sum([len(_roidb) for _roidb in roidbs[:i]]) perm_one += off perm_two += off perms[i] = zip(perm_one, perm_two) self._perm = [item for sublist in perms for item in sublist] np.random.shuffle(self._perm) self._perm = [item for tup in self._perm for item in tup] elif cfg.TRAIN.ASPECT_GROUPING: widths = np.array([r['width'] for r in self._roidb]) heights = np.array([r['height'] for r in self._roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.random.permutation(np.arange(inds.shape[0])) inds = np.reshape(inds[row_perm, :], (-1, )) self._perm = inds else: self._perm = np.random.permutation(np.arange(len(self._roidb))) self._perm = deque(self._perm) self._cur = 0 def _get_next_minibatch_inds(self): """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur >= len(self._perm): self._shuffle_roidb_inds() return db_inds def get_output_names(self): return self._output_names def enqueue_blobs(self, gpu_id, blob_names, blobs): """Put a mini-batch on a BlobsQueue.""" assert len(blob_names) == len(blobs) t = time.time() dev = c2_utils.CudaDevice(gpu_id) queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name) blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names] for (blob_name, blob) in zip(blob_names, blobs): workspace.FeedBlob(blob_name, blob, device_option=dev) logger.debug( 'enqueue_blobs {}: workspace.FeedBlob: {}'. format(gpu_id, time.time() - t) ) t = time.time() op = core.CreateOperator( 'SafeEnqueueBlobs', [queue_name] + blob_names, blob_names + [queue_name + '_enqueue_status'], device_option=dev ) workspace.RunOperatorOnce(op) logger.debug( 'enqueue_blobs {}: workspace.RunOperatorOnce: {}'. format(gpu_id, time.time() - t) ) def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory self._workers = [ threading.Thread(target=self.minibatch_loader_thread) for _ in range(self._num_loaders) ] # Create one BlobsQueue per GPU # (enqueue_blob_names are unscoped) enqueue_blob_names = self.create_blobs_queues() # Create one enqueuer thread per GPU self._enqueuers = [ threading.Thread( target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names) ) for gpu_id in range(self._num_gpus) ] def start(self, prefill=False): for w in self._workers + self._enqueuers: w.setDaemon(True) w.start() if prefill: logger.info('Pre-filling mini-batch queue...') while not self._minibatch_queue.full(): logger.info( ' [{:d}/{:d}]'.format( self._minibatch_queue.qsize(), self._minibatch_queue.maxsize ) ) time.sleep(0.1) # Detect failure and shutdown if self.coordinator.should_stop(): self.shutdown() break def has_stopped(self): return self.coordinator.should_stop() def shutdown(self): self.coordinator.request_stop() self.coordinator.wait_for_stop() self.close_blobs_queues() for w in self._workers + self._enqueuers: w.join() def create_blobs_queues(self): """Create one BlobsQueue for each GPU to hold mini-batches.""" for gpu_id in range(self._num_gpus): with c2_utils.GpuNameScope(gpu_id): workspace.RunOperatorOnce( core.CreateOperator( 'CreateBlobsQueue', [], [self._blobs_queue_name], num_blobs=len(self.get_output_names()), capacity=self._blobs_queue_capacity ) ) return self.create_enqueue_blobs() def close_blobs_queues(self): """Close a BlobsQueue.""" for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): workspace.RunOperatorOnce( core.CreateOperator( 'CloseBlobsQueue', [self._blobs_queue_name], [] ) ) def create_enqueue_blobs(self): blob_names = self.get_output_names() enqueue_blob_names = [ '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names ] for gpu_id in range(self._num_gpus): with c2_utils.NamedCudaScope(gpu_id): for blob in enqueue_blob_names: workspace.CreateBlob(core.ScopedName(blob)) return enqueue_blob_names def register_sigint_handler(self): def signal_handler(signal, frame): logger.info( 'SIGINT: Shutting down RoIDataLoader threads and exiting...' ) self.shutdown() signal.signal(signal.SIGINT, signal_handler)
class RoIDataLoader(object): def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads() def minibatch_loader_thread(self): """Load mini-batches and put them onto the mini-batch queue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): blobs = self.get_next_minibatch() # Blobs must be queued in the order specified by # self.get_output_names ordered_blobs = OrderedDict() for key in self.get_output_names(): assert blobs[key].dtype in (np.int32, np.float32), \ 'Blob {} of dtype {} must have dtype of ' \ 'np.int32 or np.float32'.format(key, blobs[key].dtype) ordered_blobs[key] = blobs[key] coordinated_put( self.coordinator, self._minibatch_queue, ordered_blobs ) logger.info('Stopping mini-batch loading thread') def enqueue_blobs_thread(self, gpu_id, blob_names): """Transfer mini-batches from a mini-batch queue to a BlobsQueue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): if self._minibatch_queue.qsize == 0: logger.warning('Mini-batch queue is empty') blobs = coordinated_get(self.coordinator, self._minibatch_queue) self.enqueue_blobs(gpu_id, blob_names, blobs.values()) logger.debug( 'batch queue size {}'.format(self._minibatch_queue.qsize()) ) logger.info('Stopping enqueue thread') def get_next_minibatch(self): """Return the blobs to be used for the next minibatch. Thread safe.""" valid = False while not valid: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] roidb_noclass = dict() #print('S6:',S6) if S6:#S6: # for S5 for roidb_i, roidb_list in enumerate(self._roidb): if roidb_list[u'image']==u'/home/icubic/daily_work/code/Detectron/detectron/datasets/data/aoi/data_all/test.png': #print('test.jpg') roidb_noclass = self._roidb[roidb_i].copy() if 0: if len(roidb_list[u'gt_classes']) == 1 :#and roidb_list[u'gt_classes'][0] ==1: roidb_noclass['1'] = self._roidb[roidb_i] if len(roidb_list[u'gt_classes']) == 2: roidb_noclass['2'] = self._roidb[roidb_i] if len(roidb_list[u'gt_classes']) == 3 and roidb_list[u'gt_classes'][0] ==2: roidb_noclass['3'] = self._roidb[roidb_i] if len(roidb_list[u'gt_classes']) >3: print('more') #roidb_noclass['3'] = self._roidb[roidb_i] #if len(minibatch_db[0][u'gt_classes']) == 0: # print('aa') if S6: blobs, valid = get_minibatch_s6(minibatch_db,roidb_noclass) else: blobs, valid = get_minibatch(minibatch_db) return blobs def _shuffle_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" if cfg.TRAIN.ASPECT_GROUPING: widths = np.array([r['width'] for r in self._roidb]) heights = np.array([r['height'] for r in self._roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.random.permutation(np.arange(inds.shape[0])) inds = np.reshape(inds[row_perm, :], (-1, )) self._perm = inds else: self._perm = np.random.permutation(np.arange(len(self._roidb))) self._perm = deque(self._perm) self._cur = 0 def _get_next_minibatch_inds(self): """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur >= len(self._perm): self._shuffle_roidb_inds() return db_inds def _get_next_minibatch_inds_s6(self): """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur >= len(self._perm): self._shuffle_roidb_inds() return db_inds def get_output_names(self): return self._output_names def enqueue_blobs(self, gpu_id, blob_names, blobs): """Put a mini-batch on a BlobsQueue.""" #print('------loader.py enqueue_blobs') assert len(blob_names) == len(blobs) t = time.time() dev = c2_utils.CudaDevice(gpu_id) queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name) blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names] for (blob_name, blob) in zip(blob_names, blobs): workspace.FeedBlob(blob_name, blob, device_option=dev) logger.debug( 'enqueue_blobs {}: workspace.FeedBlob: {}'. format(gpu_id, time.time() - t) ) t = time.time() op = core.CreateOperator( 'SafeEnqueueBlobs', [queue_name] + blob_names, blob_names + [queue_name + '_enqueue_status'], device_option=dev ) workspace.RunOperatorOnce(op) logger.debug( 'enqueue_blobs {}: workspace.RunOperatorOnce: {}'. format(gpu_id, time.time() - t) ) def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory self._workers = [ threading.Thread(target=self.minibatch_loader_thread) for _ in range(self._num_loaders) ] # Create one BlobsQueue per GPU # (enqueue_blob_names are unscoped) enqueue_blob_names = self.create_blobs_queues() # Create one enqueuer thread per GPU self._enqueuers = [ threading.Thread( target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names) ) for gpu_id in range(self._num_gpus) ] def start(self, prefill=False): for w in self._workers + self._enqueuers: w.start() if prefill: logger.info('Pre-filling mini-batch queue...') while not self._minibatch_queue.full(): logger.info( ' [{:d}/{:d}]'.format( self._minibatch_queue.qsize(), self._minibatch_queue.maxsize ) ) time.sleep(0.1) # Detect failure and shutdown if self.coordinator.should_stop(): self.shutdown() break def shutdown(self): self.coordinator.request_stop() self.coordinator.wait_for_stop() self.close_blobs_queues() for w in self._workers + self._enqueuers: w.join() def create_blobs_queues(self): """Create one BlobsQueue for each GPU to hold mini-batches.""" for gpu_id in range(self._num_gpus): with c2_utils.GpuNameScope(gpu_id): workspace.RunOperatorOnce( core.CreateOperator( 'CreateBlobsQueue', [], [self._blobs_queue_name], num_blobs=len(self.get_output_names()), capacity=self._blobs_queue_capacity ) ) return self.create_enqueue_blobs() def close_blobs_queues(self): """Close a BlobsQueue.""" for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): workspace.RunOperatorOnce( core.CreateOperator( 'CloseBlobsQueue', [self._blobs_queue_name], [] ) ) def create_enqueue_blobs(self): blob_names = self.get_output_names() enqueue_blob_names = [ '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names ] for gpu_id in range(self._num_gpus): with c2_utils.NamedCudaScope(gpu_id): for blob in enqueue_blob_names: workspace.CreateBlob(core.ScopedName(blob)) return enqueue_blob_names def register_sigint_handler(self): def signal_handler(signal, frame): logger.info( 'SIGINT: Shutting down RoIDataLoader threads and exiting...' ) self.shutdown() signal.signal(signal.SIGINT, signal_handler)
class RoIDataLoader(object): def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads() def minibatch_loader_thread(self): """Load mini-batches and put them onto the mini-batch queue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): blobs = self.get_next_minibatch() # Blobs must be queued in the order specified by # self.get_output_names ordered_blobs = OrderedDict() for key in self.get_output_names(): assert blobs[key].dtype in (np.int32, np.float32), \ 'Blob {} of dtype {} must have dtype of ' \ 'np.int32 or np.float32'.format(key, blobs[key].dtype) ordered_blobs[key] = blobs[key] coordinated_put( self.coordinator, self._minibatch_queue, ordered_blobs ) logger.info('Stopping mini-batch loading thread') def enqueue_blobs_thread(self, gpu_id, blob_names): """Transfer mini-batches from a mini-batch queue to a BlobsQueue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): if self._minibatch_queue.qsize == 0: logger.warning('Mini-batch queue is empty') blobs = coordinated_get(self.coordinator, self._minibatch_queue) self.enqueue_blobs(gpu_id, blob_names, blobs.values()) logger.debug( 'batch queue size {}'.format(self._minibatch_queue.qsize()) ) logger.info('Stopping enqueue thread') def get_next_minibatch(self): """Return the blobs to be used for the next minibatch. Thread safe.""" valid = False while not valid: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] blobs, valid = get_minibatch(minibatch_db) return blobs def _shuffle_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" if cfg.TRAIN.ASPECT_GROUPING: widths = np.array([r['width'] for r in self._roidb]) heights = np.array([r['height'] for r in self._roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.random.permutation(np.arange(inds.shape[0])) inds = np.reshape(inds[row_perm, :], (-1, )) self._perm = inds else: self._perm = np.random.permutation(np.arange(len(self._roidb))) self._perm = deque(self._perm) self._cur = 0 def _get_next_minibatch_inds(self): """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur >= len(self._perm): self._shuffle_roidb_inds() return db_inds def get_output_names(self): return self._output_names def enqueue_blobs(self, gpu_id, blob_names, blobs): """Put a mini-batch on a BlobsQueue.""" assert len(blob_names) == len(blobs) t = time.time() dev = c2_utils.CudaDevice(gpu_id) queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name) blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names] for (blob_name, blob) in zip(blob_names, blobs): workspace.FeedBlob(blob_name, blob, device_option=dev) logger.debug( 'enqueue_blobs {}: workspace.FeedBlob: {}'. format(gpu_id, time.time() - t) ) t = time.time() op = core.CreateOperator( 'SafeEnqueueBlobs', [queue_name] + blob_names, blob_names + [queue_name + '_enqueue_status'], device_option=dev ) workspace.RunOperatorOnce(op) logger.debug( 'enqueue_blobs {}: workspace.RunOperatorOnce: {}'. format(gpu_id, time.time() - t) ) def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory self._workers = [ threading.Thread(target=self.minibatch_loader_thread) for _ in range(self._num_loaders) ] # Create one BlobsQueue per GPU # (enqueue_blob_names are unscoped) enqueue_blob_names = self.create_blobs_queues() # Create one enqueuer thread per GPU self._enqueuers = [ threading.Thread( target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names) ) for gpu_id in range(self._num_gpus) ] def start(self, prefill=False): for w in self._workers + self._enqueuers: w.start() if prefill: logger.info('Pre-filling mini-batch queue...') while not self._minibatch_queue.full(): logger.info( ' [{:d}/{:d}]'.format( self._minibatch_queue.qsize(), self._minibatch_queue.maxsize ) ) time.sleep(0.1) # Detect failure and shutdown if self.coordinator.should_stop(): self.shutdown() break def shutdown(self): self.coordinator.request_stop() self.coordinator.wait_for_stop() self.close_blobs_queues() for w in self._workers + self._enqueuers: w.join() def create_blobs_queues(self): """Create one BlobsQueue for each GPU to hold mini-batches.""" for gpu_id in range(self._num_gpus): with c2_utils.GpuNameScope(gpu_id): workspace.RunOperatorOnce( core.CreateOperator( 'CreateBlobsQueue', [], [self._blobs_queue_name], num_blobs=len(self.get_output_names()), capacity=self._blobs_queue_capacity ) ) return self.create_enqueue_blobs() def close_blobs_queues(self): """Close a BlobsQueue.""" for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): workspace.RunOperatorOnce( core.CreateOperator( 'CloseBlobsQueue', [self._blobs_queue_name], [] ) ) def create_enqueue_blobs(self): blob_names = self.get_output_names() enqueue_blob_names = [ '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names ] for gpu_id in range(self._num_gpus): with c2_utils.NamedCudaScope(gpu_id): for blob in enqueue_blob_names: workspace.CreateBlob(core.ScopedName(blob)) return enqueue_blob_names def register_sigint_handler(self): def signal_handler(signal, frame): logger.info( 'SIGINT: Shutting down RoIDataLoader threads and exiting...' ) self.shutdown() signal.signal(signal.SIGINT, signal_handler)
def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._mc = pylibmc.Client(["127.0.0.1:11212"], binary=True, behaviors={"tcp_nodelay": True, "ketama": True}) self._mc.set('rois_s','yidu') self._mc.set('inds_s','yidu') self._mc.set('freeze_fastrcnn_label_s','yidu') self._mc.set('rpn_cls_probs_fpn2_s','yidu') self._mc.set('rpn_bbox_pred_fpn2_s','yidu') self._mc.set('rpn_cls_probs_fpn3_s','yidu') self._mc.set('rpn_bbox_pred_fpn3_s','yidu') self._mc.set('rpn_cls_probs_fpn4_s','yidu') self._mc.set('rpn_bbox_pred_fpn4_s','yidu') self._mc.set('rpn_cls_probs_fpn5_s','yidu') self._mc.set('rpn_bbox_pred_fpn5_s','yidu') self._mc.set('rpn_cls_probs_fpn6_s','yidu') self._mc.set('rpn_bbox_pred_fpn6_s','yidu') self._mc.set('rois',[]) self._mc.set('inds',[]) self._mc.set('freeze_fastrcnn_label',[]) self._mc.set('rpn_cls_probs_fpn2',[]) self._mc.set('rpn_bbox_pred_fpn2',[]) self._mc.set('rpn_cls_probs_fpn3',[]) self._mc.set('rpn_bbox_pred_fpn3',[]) self._mc.set('rpn_cls_probs_fpn4',[]) self._mc.set('rpn_bbox_pred_fpn4',[]) self._mc.set('rpn_cls_probs_fpn5',[]) self._mc.set('rpn_bbox_pred_fpn5',[]) self._mc.set('rpn_cls_probs_fpn6',[]) self._mc.set('rpn_bbox_pred_fpn6',[]) self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._inds=[] self._shuffle_roidb_inds0() self._shuffle_roidb_inds() self.create_threads()
class RoIDataLoader(object): def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._mc = pylibmc.Client(["127.0.0.1:11212"], binary=True, behaviors={"tcp_nodelay": True, "ketama": True}) self._mc.set('rois_s','yidu') self._mc.set('inds_s','yidu') self._mc.set('freeze_fastrcnn_label_s','yidu') self._mc.set('rpn_cls_probs_fpn2_s','yidu') self._mc.set('rpn_bbox_pred_fpn2_s','yidu') self._mc.set('rpn_cls_probs_fpn3_s','yidu') self._mc.set('rpn_bbox_pred_fpn3_s','yidu') self._mc.set('rpn_cls_probs_fpn4_s','yidu') self._mc.set('rpn_bbox_pred_fpn4_s','yidu') self._mc.set('rpn_cls_probs_fpn5_s','yidu') self._mc.set('rpn_bbox_pred_fpn5_s','yidu') self._mc.set('rpn_cls_probs_fpn6_s','yidu') self._mc.set('rpn_bbox_pred_fpn6_s','yidu') self._mc.set('rois',[]) self._mc.set('inds',[]) self._mc.set('freeze_fastrcnn_label',[]) self._mc.set('rpn_cls_probs_fpn2',[]) self._mc.set('rpn_bbox_pred_fpn2',[]) self._mc.set('rpn_cls_probs_fpn3',[]) self._mc.set('rpn_bbox_pred_fpn3',[]) self._mc.set('rpn_cls_probs_fpn4',[]) self._mc.set('rpn_bbox_pred_fpn4',[]) self._mc.set('rpn_cls_probs_fpn5',[]) self._mc.set('rpn_bbox_pred_fpn5',[]) self._mc.set('rpn_cls_probs_fpn6',[]) self._mc.set('rpn_bbox_pred_fpn6',[]) self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._inds=[] self._shuffle_roidb_inds0() self._shuffle_roidb_inds() self.create_threads() def minibatch_loader_thread(self): """Load mini-batches and put them onto the mini-batch queue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): blobs = self.get_next_minibatch() # Blobs must be queued in the order specified by # self.get_output_names ordered_blobs = OrderedDict() for key in self.get_output_names(): assert blobs[key].dtype in (np.int32, np.float32), \ 'Blob {} of dtype {} must have dtype of ' \ 'np.int32 or np.float32'.format(key, blobs[key].dtype) ordered_blobs[key] = blobs[key] coordinated_put( self.coordinator, self._minibatch_queue, ordered_blobs ) logger.info('Stopping mini-batch loading thread') def enqueue_blobs_thread(self, gpu_id, blob_names): """Transfer mini-batches from a mini-batch queue to a BlobsQueue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): if self._minibatch_queue.qsize == 0: logger.warning('Mini-batch queue is empty') blobs = coordinated_get(self.coordinator, self._minibatch_queue) self.enqueue_blobs(gpu_id, blob_names, blobs.values()) logger.debug( 'batch queue size {}'.format(self._minibatch_queue.qsize()) ) logger.info('Stopping enqueue thread') def get_next_minibatch(self): """Return the blobs to be used for the next minibatch. Thread safe.""" valid = False while not valid: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] blobs, valid = get_minibatch(minibatch_db) return blobs def _shuffle_roidb_inds0(self): """Randomly permute the training roidb. Not thread safe.""" widths = np.array([r['width'] for r in self._roidb]) heights = np.array([r['height'] for r in self._roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.arange(inds.shape[0]) inds = np.reshape(inds[row_perm, :], (-1, )) while True: if(self._mc.get('inds_s')=='yidu'): break self._mc.replace('inds',inds) self._mc.replace('inds_s','weidu') self._inds = inds def _shuffle_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" self._perm = self._inds self._perm = deque(self._perm) self._cur = 0 def _get_next_minibatch_inds(self): """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur+100 >= len(self._perm) and self._cur+100-cfg.TRAIN.IMS_PER_BATCH < len(self._perm): self._shuffle_roidb_inds0() if self._cur >= len(self._perm): self._shuffle_roidb_inds() return db_inds def get_output_names(self): return self._output_names def enqueue_blobs(self, gpu_id, blob_names, blobs): """Put a mini-batch on a BlobsQueue.""" assert len(blob_names) == len(blobs) t = time.time() dev = c2_utils.CudaDevice(gpu_id) queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name) blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names] for (blob_name, blob) in zip(blob_names, blobs): workspace.FeedBlob(blob_name, blob, device_option=dev) logger.debug( 'enqueue_blobs {}: workspace.FeedBlob: {}'. format(gpu_id, time.time() - t) ) t = time.time() op = core.CreateOperator( 'SafeEnqueueBlobs', [queue_name] + blob_names, blob_names + [queue_name + '_enqueue_status'], device_option=dev ) workspace.RunOperatorOnce(op) logger.debug( 'enqueue_blobs {}: workspace.RunOperatorOnce: {}'. format(gpu_id, time.time() - t) ) def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory self._workers = [ threading.Thread(target=self.minibatch_loader_thread) for _ in range(self._num_loaders) ] # Create one BlobsQueue per GPU # (enqueue_blob_names are unscoped) enqueue_blob_names = self.create_blobs_queues() # Create one enqueuer thread per GPU self._enqueuers = [ threading.Thread( target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names) ) for gpu_id in range(self._num_gpus) ] def start(self, prefill=False): for w in self._workers + self._enqueuers: w.setDaemon(True) w.start() if prefill: logger.info('Pre-filling mini-batch queue...') while not self._minibatch_queue.full(): logger.info( ' [{:d}/{:d}]'.format( self._minibatch_queue.qsize(), self._minibatch_queue.maxsize ) ) time.sleep(0.1) # Detect failure and shutdown if self.coordinator.should_stop(): self.shutdown() break def has_stopped(self): return self.coordinator.should_stop() def shutdown(self): self.coordinator.request_stop() self.coordinator.wait_for_stop() self.close_blobs_queues() for w in self._workers + self._enqueuers: w.join() def create_blobs_queues(self): """Create one BlobsQueue for each GPU to hold mini-batches.""" for gpu_id in range(self._num_gpus): with c2_utils.GpuNameScope(gpu_id): workspace.RunOperatorOnce( core.CreateOperator( 'CreateBlobsQueue', [], [self._blobs_queue_name], num_blobs=len(self.get_output_names()), capacity=self._blobs_queue_capacity ) ) return self.create_enqueue_blobs() def close_blobs_queues(self): """Close a BlobsQueue.""" for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): workspace.RunOperatorOnce( core.CreateOperator( 'CloseBlobsQueue', [self._blobs_queue_name], [] ) ) def create_enqueue_blobs(self): blob_names = self.get_output_names() enqueue_blob_names = [ '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names ] for gpu_id in range(self._num_gpus): with c2_utils.NamedCudaScope(gpu_id): for blob in enqueue_blob_names: workspace.CreateBlob(core.ScopedName(blob)) return enqueue_blob_names def register_sigint_handler(self): def signal_handler(signal, frame): logger.info( 'SIGINT: Shutting down RoIDataLoader threads and exiting...' ) self.shutdown() signal.signal(signal.SIGINT, signal_handler)
class RoIDataLoader(object): def __init__(self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads() def minibatch_loader_thread(self): """Load mini-batches and put them onto the mini-batch queue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): blobs = self.get_next_minibatch() # Blobs must be queued in the order specified by # self.get_output_names ordered_blobs = OrderedDict() for key in self.get_output_names(): assert blobs[key].dtype in (np.int32, np.float32), \ 'Blob {} of dtype {} must have dtype of ' \ 'np.int32 or np.float32'.format(key, blobs[key].dtype) ordered_blobs[key] = blobs[key] coordinated_put(self.coordinator, self._minibatch_queue, ordered_blobs) logger.info('Stopping mini-batch loading thread') def enqueue_blobs_thread(self, gpu_id, blob_names): """Transfer mini-batches from a mini-batch queue to a BlobsQueue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): if self._minibatch_queue.qsize == 0: logger.warning('Mini-batch queue is empty') blobs = coordinated_get(self.coordinator, self._minibatch_queue) self.enqueue_blobs(gpu_id, blob_names, blobs.values()) logger.debug('batch queue size {}'.format( self._minibatch_queue.qsize())) logger.info('Stopping enqueue thread') def get_next_minibatch(self): """Return the blobs to be used for the next minibatch. Thread safe.""" valid = False while not valid: db_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] blobs, valid = get_minibatch(minibatch_db) # img = np.asarray([blobs['data'][0][2], blobs['data'][0][1], blobs['data'][0][0]]).astype('uint8')[0] # matrix = blobs['im_tr_matrix'] # scale = blobs['im_info'][0][2] # for gt_roi in minibatch_db[0]['boxes']: # w, h = gt_roi[2] - gt_roi[0], gt_roi[3] - gt_roi[1] # nw, nh = int(w * scale), int(h * scale) # center_x, center_y = gt_roi[0] + w / 2, gt_roi[1] + h / 2 # new_center = np.dot(matrix, [[center_x], [center_y], [1.0]]).astype('int') # new_center_x = int(new_center[0][0]) # new_center_y = int(new_center[1][0]) # nbx = int(new_center_x - nw / 2) # nby = int(new_center_y - nh / 2) # nbx2 = int(nbx + nw) # nby2 = int(nby + nh) # cv2.rectangle(img, (nbx, nby), (nbx2, nby2), (255, 0, 0), 2) # #gt_rois.append([nbx, nby, nbx2, nby2]) # if cv2.imwrite(os.path.join(cfg.OUTPUT_DIR, str(minibatch_db[0]['id'])+'.png'), img): # printed = 1 # else: # printed = 0 pass return blobs def _shuffle_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" if cfg.TRAIN.ASPECT_GROUPING: widths = np.array([r['width'] for r in self._roidb]) heights = np.array([r['height'] for r in self._roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.random.permutation(np.arange(inds.shape[0])) inds = np.reshape(inds[row_perm, :], (-1, )) self._perm = inds else: self._perm = np.random.permutation(np.arange(len(self._roidb))) self._perm = deque(self._perm) self._cur = 0 def _get_next_minibatch_inds(self): """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. db_inds = [self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH)] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur >= len(self._perm): self._shuffle_roidb_inds() return db_inds def get_output_names(self): return self._output_names def enqueue_blobs(self, gpu_id, blob_names, blobs): """Put a mini-batch on a BlobsQueue.""" assert len(blob_names) == len(blobs) t = time.time() dev = c2_utils.CudaDevice(gpu_id) queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name) blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names] for (blob_name, blob) in zip(blob_names, blobs): workspace.FeedBlob(blob_name, blob, device_option=dev) logger.debug('enqueue_blobs {}: workspace.FeedBlob: {}'.format( gpu_id, time.time() - t)) t = time.time() op = core.CreateOperator('SafeEnqueueBlobs', [queue_name] + blob_names, blob_names + [queue_name + '_enqueue_status'], device_option=dev) workspace.RunOperatorOnce(op) logger.debug('enqueue_blobs {}: workspace.RunOperatorOnce: {}'.format( gpu_id, time.time() - t)) def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory self._workers = [ threading.Thread(target=self.minibatch_loader_thread) for _ in range(self._num_loaders) ] # Create one BlobsQueue per GPU # (enqueue_blob_names are unscoped) enqueue_blob_names = self.create_blobs_queues() # Create one enqueuer thread per GPU self._enqueuers = [ threading.Thread(target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names)) for gpu_id in range(self._num_gpus) ] def start(self, prefill=False): for w in self._workers + self._enqueuers: w.setDaemon(True) w.start() if prefill: logger.info('Pre-filling mini-batch queue...') while not self._minibatch_queue.full(): logger.info(' [{:d}/{:d}]'.format( self._minibatch_queue.qsize(), self._minibatch_queue.maxsize)) time.sleep(0.1) # Detect failure and shutdown if self.coordinator.should_stop(): self.shutdown() break def has_stopped(self): return self.coordinator.should_stop() def shutdown(self): self.coordinator.request_stop() self.coordinator.wait_for_stop() self.close_blobs_queues() for w in self._workers + self._enqueuers: w.join() def create_blobs_queues(self): """Create one BlobsQueue for each GPU to hold mini-batches.""" for gpu_id in range(self._num_gpus): with c2_utils.GpuNameScope(gpu_id): workspace.RunOperatorOnce( core.CreateOperator('CreateBlobsQueue', [], [self._blobs_queue_name], num_blobs=len(self.get_output_names()), capacity=self._blobs_queue_capacity)) return self.create_enqueue_blobs() def close_blobs_queues(self): """Close a BlobsQueue.""" for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): workspace.RunOperatorOnce( core.CreateOperator('CloseBlobsQueue', [self._blobs_queue_name], [])) def create_enqueue_blobs(self): blob_names = self.get_output_names() enqueue_blob_names = [ '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names ] for gpu_id in range(self._num_gpus): with c2_utils.NamedCudaScope(gpu_id): for blob in enqueue_blob_names: workspace.CreateBlob(core.ScopedName(blob)) return enqueue_blob_names def register_sigint_handler(self): def signal_handler(signal, frame): logger.info( 'SIGINT: Shutting down RoIDataLoader threads and exiting...') self.shutdown() signal.signal(signal.SIGINT, signal_handler)
class RoIDataLoader(object): def __init__(self, source_roidb, target_roidb=None, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8): self._roidb = source_roidb self._target_roidb = target_roidb self._lock = threading.Lock() self._perm = deque(np.random.permutation(len(self._roidb))) if target_roidb != None: self._target_perm = deque( np.random.permutation(len(self._target_roidb))) self._cur = 0 # _perm cursor self._target_cur = 0 # _target_perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads() def minibatch_loader_thread(self): """Load mini-batches and put them onto the mini-batch queue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): blobs = self.get_next_minibatch() # Blobs must be queued in the order specified by # self.get_output_names ordered_blobs = OrderedDict() for key in self.get_output_names(): assert blobs[key].dtype in (np.int32, np.float32, np.bool_), \ 'Blob {} of dtype {} must have dtype of ' \ 'np.int32 or np.float32 or np.bool_'.format(key, blobs[key].dtype) ordered_blobs[key] = blobs[key] coordinated_put(self.coordinator, self._minibatch_queue, ordered_blobs) logger.info('Stopping mini-batch loading thread') def enqueue_blobs_thread(self, gpu_id, blob_names): """Transfer mini-batches from a mini-batch queue to a BlobsQueue.""" with self.coordinator.stop_on_exception(): while not self.coordinator.should_stop(): if self._minibatch_queue.qsize == 0: logger.warning('Mini-batch queue is empty') blobs = coordinated_get(self.coordinator, self._minibatch_queue) self.enqueue_blobs(gpu_id, blob_names, blobs.values()) logger.debug('batch queue size {}'.format( self._minibatch_queue.qsize())) logger.info('Stopping enqueue thread') def get_next_minibatch(self): """Return the blobs to be used for the next minibatch. Thread safe.""" valid = False while not valid: db_inds, db_target_inds = self._get_next_minibatch_inds() minibatch_db = [self._roidb[i] for i in db_inds] if db_target_inds != None: minibatch_db += [self._target_roidb[i] for i in db_target_inds] blobs, valid = get_minibatch(minibatch_db) return blobs def _shuffle_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" if cfg.TRAIN.ASPECT_GROUPING: widths = np.array([r['width'] for r in self._roidb]) heights = np.array([r['height'] for r in self._roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH if self._target_roidb != None: mb = cfg.TRAIN.IMS_PER_BATCH - cfg.TRAIN.IMS_PER_BATCH // 2 horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.random.permutation(np.arange(inds.shape[0])) inds = np.reshape(inds[row_perm, :], (-1, )) self._perm = inds else: self._perm = np.random.permutation(np.arange(len(self._roidb))) self._perm = deque(self._perm) self._cur = 0 def _shuffle_target_roidb_inds(self): """Randomly permute the training roidb. Not thread safe.""" if cfg.TRAIN.ASPECT_GROUPING: widths = np.array([r['width'] for r in self._target_roidb]) heights = np.array([r['height'] for r in self._target_roidb]) horz = (widths >= heights) vert = np.logical_not(horz) horz_inds = np.where(horz)[0] vert_inds = np.where(vert)[0] horz_inds = np.random.permutation(horz_inds) vert_inds = np.random.permutation(vert_inds) mb = cfg.TRAIN.IMS_PER_BATCH // 2 horz_inds = horz_inds[:(len(horz_inds) // mb) * mb] vert_inds = vert_inds[:(len(vert_inds) // mb) * mb] inds = np.hstack((horz_inds, vert_inds)) inds = np.reshape(inds, (-1, mb)) row_perm = np.random.permutation(np.arange(inds.shape[0])) inds = np.reshape(inds[row_perm, :], (-1, )) self._target_perm = inds else: self._target_perm = np.random.permutation( np.arange(len(self._target_roidb))) self._target_perm = deque(self._target_perm) self._target_cur = 0 def _get_next_minibatch_inds(self): """Return the roidb indices for the next minibatch. Thread safe.""" with self._lock: # We use a deque and always take the *first* IMS_PER_BATCH items # followed by *rotating* the deque so that we see fresh items # each time. If the length of _perm is not divisible by # IMS_PER_BATCH, then we end up wrapping around the permutation. if self._target_roidb == None: db_inds = [ self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH) ] self._perm.rotate(-cfg.TRAIN.IMS_PER_BATCH) self._cur += cfg.TRAIN.IMS_PER_BATCH if self._cur >= len(self._perm): self._shuffle_roidb_inds() db_target_inds = None else: db_inds = [ self._perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH - cfg.TRAIN.IMS_PER_BATCH // 2) ] db_target_inds = [ self._target_perm[i] for i in range(cfg.TRAIN.IMS_PER_BATCH // 2) ] self._perm.rotate(-len(db_inds)) self._target_perm.rotate(-len(db_target_inds)) self._cur += len(db_inds) self._target_cur += len(db_target_inds) if self._cur >= len(self._perm): self._shuffle_roidb_inds() if self._target_cur >= len(self._target_perm): self._shuffle_target_roidb_inds() # logger.info(str(('loading',db_inds,self._cur))) return db_inds, db_target_inds def get_perm_state(self, iters_done): state = {} with self._lock: perm = self._perm cur = self._cur if self._target_roidb is None: ims_per_batch = cfg.TRAIN.IMS_PER_BATCH else: ims_per_batch = cfg.TRAIN.IMS_PER_BATCH - cfg.TRAIN.IMS_PER_BATCH // 2 batches_per_roidb = (len(self._roidb) + ims_per_batch - 1) // ims_per_batch actual_cur = (iters_done % batches_per_roidb) * ims_per_batch # undo imgs in mb_queue and BlobsQueue: mb_qsize = max(0, cur - actual_cur) perm.rotate(mb_qsize) cur = actual_cur state['roidb_order'] = np.array([cur] + list(perm), dtype=np.int32) if self._target_roidb is not None: perm = self._target_perm cur = self._target_cur ims_per_batch = cfg.TRAIN.IMS_PER_BATCH // 2 batches_per_roidb = (len(self._roidb) + ims_per_batch - 1) // ims_per_batch actual_cur = (iters_done % batches_per_roidb) * ims_per_batch # undo imgs in mb_queue and BlobsQueue: mb_qsize = max(0, cur - actual_cur) perm.rotate(mb_qsize) cur = actual_cur state['target_roidb_order'] = np.array([cur] + list(perm), dtype=np.int32) # logger.info(str(('saving',cur,list(perm)[:10],-mb_qsize))) return state def set_perm_state(self, state): order = state['roidb_order'] if type(state) == type(dict()) else state cur = order[0] perm = order[1:] if len(perm) == len(list(self._perm)): with self._lock: self._minibatch_queue.empty() self._perm = deque(perm) self._cur = cur if self._target_roidb is not None and type(state) == type( dict()) and 'target_roidb_order' in state: order = state['target_roidb_order'] self._target_cur = order[0] self._target_perm = deque(order[1:]) logger.info('roidb target perm state loaded') logger.info('roidb perm state loaded') else: logger.info('roidb state not loaded, different size train set.') def get_output_names(self): return self._output_names def enqueue_blobs(self, gpu_id, blob_names, blobs): """Put a mini-batch on a BlobsQueue.""" assert len(blob_names) == len(blobs) t = time.time() dev = c2_utils.CudaDevice(gpu_id) queue_name = 'gpu_{}/{}'.format(gpu_id, self._blobs_queue_name) blob_names = ['gpu_{}/{}'.format(gpu_id, b) for b in blob_names] for (blob_name, blob) in zip(blob_names, blobs): workspace.FeedBlob(blob_name, blob, device_option=dev) logger.debug('enqueue_blobs {}: workspace.FeedBlob: {}'.format( gpu_id, time.time() - t)) t = time.time() op = core.CreateOperator('SafeEnqueueBlobs', [queue_name] + blob_names, blob_names + [queue_name + '_enqueue_status'], device_option=dev) workspace.RunOperatorOnce(op) logger.debug('enqueue_blobs {}: workspace.RunOperatorOnce: {}'.format( gpu_id, time.time() - t)) def create_threads(self): # Create mini-batch loader threads, each of which builds mini-batches # and places them into a queue in CPU memory self._workers = [ threading.Thread(target=self.minibatch_loader_thread) for _ in range(self._num_loaders) ] # Create one BlobsQueue per GPU # (enqueue_blob_names are unscoped) enqueue_blob_names = self.create_blobs_queues() # Create one enqueuer thread per GPU self._enqueuers = [ threading.Thread(target=self.enqueue_blobs_thread, args=(gpu_id, enqueue_blob_names)) for gpu_id in range(self._num_gpus) ] def start(self, prefill=False): for w in self._workers + self._enqueuers: w.setDaemon(True) w.start() if prefill: logger.info('Pre-filling mini-batch queue...') while not self._minibatch_queue.full(): logger.info(' [{:d}/{:d}]'.format( self._minibatch_queue.qsize(), self._minibatch_queue.maxsize)) time.sleep(0.1) # Detect failure and shutdown if self.coordinator.should_stop(): self.shutdown() break def has_stopped(self): return self.coordinator.should_stop() def shutdown(self): self.coordinator.request_stop() self.coordinator.wait_for_stop() self.close_blobs_queues() for w in self._workers + self._enqueuers: w.join() def create_blobs_queues(self): """Create one BlobsQueue for each GPU to hold mini-batches.""" for gpu_id in range(self._num_gpus): with c2_utils.GpuNameScope(gpu_id): workspace.RunOperatorOnce( core.CreateOperator('CreateBlobsQueue', [], [self._blobs_queue_name], num_blobs=len(self.get_output_names()), capacity=self._blobs_queue_capacity)) return self.create_enqueue_blobs() def close_blobs_queues(self): """Close a BlobsQueue.""" for gpu_id in range(self._num_gpus): with core.NameScope('gpu_{}'.format(gpu_id)): workspace.RunOperatorOnce( core.CreateOperator('CloseBlobsQueue', [self._blobs_queue_name], [])) def create_enqueue_blobs(self): blob_names = self.get_output_names() enqueue_blob_names = [ '{}_enqueue_{}'.format(b, self._loader_id) for b in blob_names ] for gpu_id in range(self._num_gpus): with c2_utils.NamedCudaScope(gpu_id): for blob in enqueue_blob_names: workspace.CreateBlob(core.ScopedName(blob)) return enqueue_blob_names def register_sigint_handler(self): def signal_handler(signal, frame): logger.info( 'SIGINT: Shutting down RoIDataLoader threads and exiting...') self.shutdown() signal.signal(signal.SIGINT, signal_handler)