def _worker_loop(dataset, job_queue: mp.Queue, result_queue: mp.Queue, interrupt_event: mp.Event): logger = logging.getLogger("worker_loop") logger.debug("Worker started.") while True: logger.debug("Trying to fetch from job_queue.") if interrupt_event.is_set(): logger.debug("Received interrupt signal, breaking.") break try: # This assumes that the job_queue is fully populated before the worker is started. index = job_queue.get_nowait() logger.debug("Fetch successful.") except Empty: logger.debug("Queue empty, setting up poison pill.") index = None if index is None or interrupt_event.is_set(): logger.debug( "Fetched poison pill or received interrupt signal, breaking.") break try: logger.debug("Sampling index {} from dataset.".format(index)) sample = dataset[index] except Exception: logger.debug("Dataset threw an exception.".format(index), exc_info=1) result_queue.put((index, ExceptionWrapper(sys.exc_info()))) else: logger.debug( "Putting sample at index {} in the result queue.".format( index)) result_queue.put((index, sample))
class sIMapIterator(object): def __init__(self, cache, maxsize): self._cond = threading.Condition(threading.Lock()) self._empty_sema = threading.Semaphore(maxsize) # self._full_sema = threading.Semaphore(0) self._job = job_counter.next() self._cache = cache # self._items = collections.deque() self._items = Queue(maxsize) # print self._items.maxsize self._index = 0 # self._put_index = 0 # self._get_index = 0 self._length = None # # self._get_lock = threading.Lock() # self._put_lock = threading.Lock() self._unsorted = {} cache[self._job] = self def __iter__(self): return self def next(self, timeout=None): # with self._get_lock: # if self._get_index == self._length: # raise StopIteration # item = self._items.get(timeout=timeout) # self._get_index += 1 # # success, value = item # if success: # return value # raise value self._cond.acquire() try: try: item = self._items.get_nowait() self._empty_sema.release() except Empty: if self._index == self._length: raise StopIteration self._cond.wait(timeout) try: item = self._items.get(timeout=timeout) self._empty_sema.release() except Empty: if self._index == self._length: raise StopIteration raise TimeoutError finally: self._cond.release() success, value = item if success: return value raise value __next__ = next # XXX def _set(self, i, obj): # with self._put_lock: # if self._put_index != i: # self._unsorted[i] = obj # else: # self._items.put(obj) # self._put_index += 1 # while self._put_index in self._unsorted: # obj = self._unsorted.pop(self._put_index) # self._items.put(obj) # self._put_index += 1 # # if self._put_index == self._length: # del self._cache[self._job] self._empty_sema.acquire() self._cond.acquire() try: if self._index == i: self._items.put_nowait(obj) self._index += 1 while self._index in self._unsorted: obj = self._unsorted.pop(self._index) self._items.put_nowait(obj) self._index += 1 self._cond.notify() else: self._unsorted[i] = obj if self._index == self._length: del self._cache[self._job] finally: self._cond.release() def _set_length(self, length): # # with self._put_lock as pl, self._get_lock as gl: # self._length = length # if self._put_index == self._length: # del self._cache[self._job] self._cond.acquire() try: self._length = length if self._index == self._length: self._cond.notify() del self._cache[self._job] finally: self._cond.release()
class DataLoader(object): def __init__(self, minibatchlist, images_path, n_workers=1, multi_view=False, use_triplets=False, infinite_loop=True, max_queue_len=4, is_training=False, apply_occlusion=False, occlusion_percentage=0.5): """ A Custom dataloader to work with our datasets, and to prepare data for the different models (inverse, priors, autoencoder, ...) :param minibatchlist: ([np.array]) list of observations indices (grouped per minibatch) :param images_path: (np.array) Array of path to images :param n_workers: (int) number of preprocessing worker (load and preprocess each image) :param multi_view: (bool) :param use_triplets: (bool) :param infinite_loop: (bool) whether to have an iterator that can be resetted, set to False, it :param max_queue_len: (int) Max number of minibatches that can be preprocessed at the same time :param apply_occlusion: is the use of occlusion enabled - when using DAE (bool) :param occlusion_percentage: max percentage of occlusion when using DAE (float) :param is_training: (bool) Set to True, the dataloader will output both `obs` and `next_obs` (a tuple of th.Tensor) Set to false, it will only output one th.Tensor. """ super(DataLoader, self).__init__() self.n_workers = n_workers self.infinite_loop = infinite_loop self.n_minibatches = len(minibatchlist) self.minibatchlist = minibatchlist self.images_path = images_path self.shuffle = is_training self.queue = Queue(max_queue_len) self.process = None self.use_triplets = use_triplets self.multi_view = multi_view # apply occlusion for training a DAE self.apply_occlusion = apply_occlusion self.occlusion_percentage = occlusion_percentage self.startProcess() @staticmethod def createTestMinibatchList(n_samples, batch_size): """ Create list of minibatch for plotting :param n_samples: (int) :param batch_size: (int) :return: ([np.array]) """ minibatchlist = [] for i in range(n_samples // batch_size + 1): start_idx = i * batch_size end_idx = min(n_samples, (i + 1) * batch_size) minibatchlist.append(np.arange(start_idx, end_idx)) return minibatchlist def startProcess(self): """Start preprocessing process""" self.process = Process(target=self._run) # Make it a deamon, so it will be deleted at the same time # of the main process self.process.daemon = True self.process.start() def _run(self): start = True with Parallel(n_jobs=self.n_workers, batch_size="auto", backend="threading") as parallel: while start or self.infinite_loop: start = False if self.shuffle: indices = np.random.permutation(self.n_minibatches).astype( np.int64) else: indices = np.arange(len(self.minibatchlist), dtype=np.int64) for minibatch_idx in indices: batch_noisy, batch_obs_noisy, batch_next_obs_noisy = None, None, None if self.shuffle: images = np.stack( (self.images_path[ self.minibatchlist[minibatch_idx]], self.images_path[self.minibatchlist[minibatch_idx] + 1])) images = images.flatten() else: images = self.images_path[ self.minibatchlist[minibatch_idx]] if self.n_workers <= 1: batch = [ self._makeBatchElement(image_path, self.multi_view, self.use_triplets) for image_path in images ] if self.apply_occlusion: batch_noisy = [ self._makeBatchElement( image_path, self.multi_view, self.use_triplets, apply_occlusion=self.apply_occlusion, occlusion_percentage=self. occlusion_percentage) for image_path in images ] else: batch = parallel( delayed(self._makeBatchElement)( image_path, self.multi_view, self.use_triplets) for image_path in images) if self.apply_occlusion: batch_noisy = parallel( delayed(self._makeBatchElement) (image_path, self.multi_view, self.use_triplets, apply_occlusion=self.apply_occlusion, occlusion_percentage=self.occlusion_percentage ) for image_path in images) batch = th.cat(batch, dim=0) if self.apply_occlusion: batch_noisy = th.cat(batch_noisy, dim=0) if self.shuffle: batch_obs, batch_next_obs = batch[:len(images) // 2], batch[len(images ) // 2:] if batch_noisy is not None: batch_obs_noisy, batch_next_obs_noisy = batch_noisy[:len(images) // 2], \ batch_noisy[len(images) // 2:] self.queue.put( (minibatch_idx, batch_obs, batch_next_obs, batch_obs_noisy, batch_next_obs_noisy)) else: self.queue.put(batch) # Free memory if self.shuffle: del batch_obs del batch_next_obs if batch_noisy is not None: del batch_obs_noisy del batch_next_obs_noisy del batch del batch_noisy self.queue.put(None) @classmethod def _makeBatchElement(cls, image_path, multi_view=False, use_triplets=False, apply_occlusion=False, occlusion_percentage=None): """ :param image_path: (str) path to an image (without the 'data/' prefix) :param multi_view: (bool) :param use_triplets: (bool) :return: (th.Tensor) """ # Remove trailing .jpg if present image_path = 'data/' + image_path.split('.jpg')[0] if multi_view: images = [] # Load different view of the same timestep for i in range(2): im = cv2.imread("{}_{}.jpg".format(image_path, i + 1)) if im is None: raise ValueError( "tried to load {}_{}.jpg, but it was not found".format( image_path, i + 1)) images.append( preprocessImage(im, apply_occlusion=apply_occlusion, occlusion_percentage=occlusion_percentage)) #################### # loading a negative observation if use_triplets: # End of file format for positive & negative observations (camera 1) - length : 6 characters extra_chars = '_1.jpg' # getting path for all files of same record episode, e.g path_to_data/record_001/frame[0-9]{6}* digits_path = glob.glob(image_path[:-6] + '[0-9]*' + extra_chars) # getting the current & all frames' timesteps current = int(image_path[-6:]) # For all others extract last 6 digits (timestep) after removing the extra chars all_frame_steps = [ int(k[:-len(extra_chars)][-6:]) for k in digits_path ] # removing current positive timestep from the list all_frame_steps.remove(current) # negative timestep by random sampling length_set_steps = len(all_frame_steps) negative = all_frame_steps[random.randint( 0, length_set_steps - 1)] negative_path = '{}{:06d}'.format(image_path[:-6], negative) im3 = cv2.imread(negative_path + "_1.jpg") if im3 is None: raise ValueError( "tried to load {}_{}.jpg, but it was not found".format( negative_path, 1)) im3 = preprocessImage(im3) # stacking along channels images.append(im3) im = np.dstack(images) else: im = cv2.imread("{}.jpg".format(image_path)) if im is None: raise ValueError( "tried to load {}.jpg, but it was not found".format( image_path)) im = preprocessImage(im, apply_occlusion=apply_occlusion, occlusion_percentage=occlusion_percentage) # Channel first (for pytorch convolutions) + one dim for the batch # th.tensor creates a copy im = th.tensor(im.reshape((1, ) + im.shape).transpose(0, 3, 2, 1)) return im def __len__(self): return self.n_minibatches def __iter__(self): return self def __next__(self): while True: try: val = self.queue.get_nowait() break except queue.Empty: time.sleep(0.001) continue if val is None: raise StopIteration return val next = __next__ # Python 2 compatibility def __del__(self): if self.process is not None: self.process.terminate()