def __init__( self, roidb, num_loaders=4, minibatch_queue_size=64, blobs_queue_capacity=8 ): self._roidb = roidb self._lock = threading.Lock() self._perm = deque(range(len(self._roidb))) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N of the # examples to the overall minibatch self._minibatch_queue = Queue.Queue(maxsize=minibatch_queue_size) self._blobs_queue_capacity = blobs_queue_capacity # Random queue name in case one instantiates multple RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # Loader threads construct (partial) minibatches and put them on the # minibatch queue self._num_loaders = num_loaders self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._shuffle_roidb_inds() self.create_threads()
def collate_minibatch(list_of_blobs): """Stack samples seperately and return a list of minibatches A batch contains NUM_GPUS minibatches and image size in different minibatch may be different. Hence, we need to stack smaples from each minibatch seperately. """ if cfg.RPN.RPN_ON: Batch = {key: [] for key in list_of_blobs[0]} # Because roidb consists of entries of variable length, it can't be batch into a tensor. # So we keep roidb in the type of "list of ndarray". list_of_roidb = [blobs.pop('roidb') for blobs in list_of_blobs] for i in range(0, len(list_of_blobs), cfg.TRAIN.IMS_PER_BATCH): mini_list = list_of_blobs[i:(i + cfg.TRAIN.IMS_PER_BATCH)] # Pad image data mini_list = pad_image_data(mini_list) minibatch = default_collate(mini_list) minibatch['roidb'] = list_of_roidb[i:(i + cfg.TRAIN.IMS_PER_BATCH)] for key in minibatch: Batch[key].append(minibatch[key]) return Batch else: Batch = {key: [] for key in get_minibatch_blob_names()} for i in range(0, len(list_of_blobs), cfg.TRAIN.IMS_PER_BATCH): roidb = list_of_blobs[i:(i + cfg.TRAIN.IMS_PER_BATCH)] blobs, valid = get_minibatch(roidb) blobs['data'] = torch.from_numpy(blobs['data']) for key in blobs: Batch[key].append(blobs[key]) return Batch
def __init__(self, roidb, num_workers=4, num_enqueuers=1, minibatch_queue_size=64, blobs_queue_capacity=8): self._roidb = roidb self._lock = multiprocessing.Lock() self._perm = np.arange(len(self._roidb)) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N examples to # the overall minibatch self._manager = multiprocessing.Manager() # Using a multiprocessing.manager.Queue instead of # multiprocessing.Queue, because the latter hangs when # exitting during worker.join. Got this idea # from https://stackoverflow.com/a/33153048 self._minibatch_queue = self._manager.Queue( maxsize=minibatch_queue_size) self._minibatch_queue_maxsize = minibatch_queue_size self._blobs_queue_capacity = blobs_queue_capacity # Random identificator to deal with multiple instances of RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # "worker" threads construct (partial) minibatches and put them on the # minibatch queue self._num_workers = num_workers # "enqueuer" threads get (partial) minibatches from the minibatch queue # and enqueue them on GPU blob queues self._num_enqueuers = num_enqueuers self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._perm, self._cur = self._shuffle_roidb_inds(self._roidb) # FOLLOWING is the stuff needed by the MULTIPROCESSING module; # Keeping it in init so that I can run the minibatch_loader2 in # debug mode from train_net.py # Previous comments: # The variables can not be shared as the class, so need to share # through manager to work with multiprocessing # manager = multiprocessing.Manager() # manager is SLOWW!!! so, using a normal dict and read-write vars # separately. Note that the following dict is NOT SHARED. A copy # will exist in each worker, so each worker can read it, but any # modifications will also be local. This is fine because I'll only # add READ_ONLY objects into this dict. self.shared_readonly_dict = {} # No need to synchronize the following things, since they are never # modified in the processes (only accessed), so a simple dictionary # is okay. self.shared_readonly_dict['output_names'] = self.get_output_names() self.shared_readonly_dict['roidb'] = self._roidb # Following will be modified, but always within a self._lock; # no a non-locking sync-ed variable would be good enough. self.mp_cur = multiprocessing.Value('i', self._cur, lock=False) self.mp_perm = multiprocessing.Array('i', self._perm.tolist(), lock=False) self.create_threads()
def __init__( self, roidb, num_workers=4, num_enqueuers=1, minibatch_queue_size=64, blobs_queue_capacity=8): self._roidb = roidb self._lock = multiprocessing.Lock() self._perm = np.arange(len(self._roidb)) self._cur = 0 # _perm cursor # The minibatch queue holds prepared training data in host (CPU) memory # When training with N > 1 GPUs, each element in the minibatch queue # is actually a partial minibatch which contributes 1 / N examples to # the overall minibatch self._manager = multiprocessing.Manager() # Using a multiprocessing.manager.Queue instead of # multiprocessing.Queue, because the latter hangs when # exitting during worker.join. Got this idea # from https://stackoverflow.com/a/33153048 self._minibatch_queue = self._manager.Queue(maxsize=minibatch_queue_size) self._minibatch_queue_maxsize = minibatch_queue_size self._blobs_queue_capacity = blobs_queue_capacity # Random identificator to deal with multiple instances of RoIDataLoaders self._loader_id = uuid.uuid4() self._blobs_queue_name = 'roi_blobs_queue_{}'.format(self._loader_id) # "worker" threads construct (partial) minibatches and put them on the # minibatch queue self._num_workers = num_workers # "enqueuer" threads get (partial) minibatches from the minibatch queue # and enqueue them on GPU blob queues self._num_enqueuers = num_enqueuers self._num_gpus = cfg.NUM_GPUS self.coordinator = Coordinator() self._output_names = get_minibatch_blob_names() self._perm, self._cur = self._shuffle_roidb_inds(self._roidb) # FOLLOWING is the stuff needed by the MULTIPROCESSING module; # Keeping it in init so that I can run the minibatch_loader2 in # debug mode from train_net.py # Previous comments: # The variables can not be shared as the class, so need to share # through manager to work with multiprocessing # manager = multiprocessing.Manager() # manager is SLOWW!!! so, using a normal dict and read-write vars # separately. Note that the following dict is NOT SHARED. A copy # will exist in each worker, so each worker can read it, but any # modifications will also be local. This is fine because I'll only # add READ_ONLY objects into this dict. self.shared_readonly_dict = {} # No need to synchronize the following things, since they are never # modified in the processes (only accessed), so a simple dictionary # is okay. self.shared_readonly_dict['output_names'] = self.get_output_names() self.shared_readonly_dict['roidb'] = self._roidb # Following will be modified, but always within a self._lock; # no a non-locking sync-ed variable would be good enough. self.mp_cur = multiprocessing.Value('i', self._cur, lock=False) self.mp_perm = multiprocessing.Array('i', self._perm.tolist(), lock=False) self.create_threads()