예제 #1
0
파일: simple.py 프로젝트: vzinche/neurofire
def _worker_loop(dataset, job_queue: mp.Queue, result_queue: mp.Queue,
                 interrupt_event: mp.Event):
    logger = logging.getLogger("worker_loop")
    logger.debug("Worker started.")
    while True:
        logger.debug("Trying to fetch from job_queue.")
        if interrupt_event.is_set():
            logger.debug("Received interrupt signal, breaking.")
            break
        try:
            # This assumes that the job_queue is fully populated before the worker is started.
            index = job_queue.get_nowait()
            logger.debug("Fetch successful.")
        except Empty:
            logger.debug("Queue empty, setting up poison pill.")
            index = None
        if index is None or interrupt_event.is_set():
            logger.debug(
                "Fetched poison pill or received interrupt signal, breaking.")
            break
        try:
            logger.debug("Sampling index {} from dataset.".format(index))
            sample = dataset[index]
        except Exception:
            logger.debug("Dataset threw an exception.".format(index),
                         exc_info=1)
            result_queue.put((index, ExceptionWrapper(sys.exc_info())))
        else:
            logger.debug(
                "Putting sample at index {} in the result queue.".format(
                    index))
            result_queue.put((index, sample))
예제 #2
0
class sIMapIterator(object):

    def __init__(self, cache, maxsize):
        self._cond = threading.Condition(threading.Lock())

        self._empty_sema = threading.Semaphore(maxsize)
        # self._full_sema = threading.Semaphore(0)

        self._job = job_counter.next()
        self._cache = cache
        # self._items = collections.deque()
        self._items = Queue(maxsize)
        # print self._items.maxsize

        self._index = 0
        # self._put_index = 0
        # self._get_index = 0
        self._length = None
        #
        # self._get_lock = threading.Lock()
        # self._put_lock = threading.Lock()

        self._unsorted = {}
        cache[self._job] = self

    def __iter__(self):
        return self

    def next(self, timeout=None):
        # with self._get_lock:
        #     if self._get_index == self._length:
        #         raise StopIteration
        #     item = self._items.get(timeout=timeout)
        #     self._get_index += 1
        #
        #     success, value = item
        #     if success:
        #         return value
        #     raise value

        self._cond.acquire()
        try:
            try:
                item = self._items.get_nowait()
                self._empty_sema.release()
            except Empty:
                if self._index == self._length:
                    raise StopIteration
                self._cond.wait(timeout)
                try:
                    item = self._items.get(timeout=timeout)
                    self._empty_sema.release()
                except Empty:
                    if self._index == self._length:
                        raise StopIteration
                    raise TimeoutError
        finally:
            self._cond.release()

        success, value = item
        if success:
            return value
        raise value

    __next__ = next                    # XXX

    def _set(self, i, obj):
        # with self._put_lock:
        #     if self._put_index != i:
        #         self._unsorted[i] = obj
        #     else:
        #         self._items.put(obj)
        #         self._put_index += 1
        #         while self._put_index in self._unsorted:
        #             obj = self._unsorted.pop(self._put_index)
        #             self._items.put(obj)
        #             self._put_index += 1
        #
        #     if self._put_index == self._length:
        #         del self._cache[self._job]

        self._empty_sema.acquire()
        self._cond.acquire()
        try:
            if self._index == i:
                self._items.put_nowait(obj)
                self._index += 1
                while self._index in self._unsorted:
                    obj = self._unsorted.pop(self._index)
                    self._items.put_nowait(obj)
                    self._index += 1
                self._cond.notify()
            else:
                self._unsorted[i] = obj

            if self._index == self._length:
                del self._cache[self._job]
        finally:
            self._cond.release()

    def _set_length(self, length):
        #
        # with self._put_lock as pl, self._get_lock as gl:
        #     self._length = length
        #     if self._put_index == self._length:
        #         del self._cache[self._job]

        self._cond.acquire()
        try:
            self._length = length
            if self._index == self._length:
                self._cond.notify()
                del self._cache[self._job]
        finally:
            self._cond.release()
예제 #3
0
class DataLoader(object):
    def __init__(self,
                 minibatchlist,
                 images_path,
                 n_workers=1,
                 multi_view=False,
                 use_triplets=False,
                 infinite_loop=True,
                 max_queue_len=4,
                 is_training=False,
                 apply_occlusion=False,
                 occlusion_percentage=0.5):
        """
        A Custom dataloader to work with our datasets, and to prepare data for the different models
        (inverse, priors, autoencoder, ...)

        :param minibatchlist: ([np.array]) list of observations indices (grouped per minibatch)
        :param images_path: (np.array) Array of path to images
        :param n_workers: (int) number of preprocessing worker (load and preprocess each image)
        :param multi_view: (bool)
        :param use_triplets: (bool)
        :param infinite_loop: (bool) whether to have an iterator that can be resetted, set to False, it
        :param max_queue_len: (int) Max number of minibatches that can be preprocessed at the same time
        :param apply_occlusion: is the use of occlusion enabled - when using DAE (bool)
        :param occlusion_percentage: max percentage of occlusion when using DAE (float)
        :param is_training: (bool)

            Set to True, the dataloader will output both `obs` and `next_obs` (a tuple of th.Tensor)
            Set to false, it will only output one th.Tensor.
        """
        super(DataLoader, self).__init__()
        self.n_workers = n_workers
        self.infinite_loop = infinite_loop
        self.n_minibatches = len(minibatchlist)
        self.minibatchlist = minibatchlist
        self.images_path = images_path
        self.shuffle = is_training
        self.queue = Queue(max_queue_len)
        self.process = None
        self.use_triplets = use_triplets
        self.multi_view = multi_view
        # apply occlusion for training a DAE
        self.apply_occlusion = apply_occlusion
        self.occlusion_percentage = occlusion_percentage
        self.startProcess()

    @staticmethod
    def createTestMinibatchList(n_samples, batch_size):
        """
        Create list of minibatch for plotting
        :param n_samples: (int)
        :param batch_size: (int)
        :return: ([np.array])
        """
        minibatchlist = []
        for i in range(n_samples // batch_size + 1):
            start_idx = i * batch_size
            end_idx = min(n_samples, (i + 1) * batch_size)
            minibatchlist.append(np.arange(start_idx, end_idx))
        return minibatchlist

    def startProcess(self):
        """Start preprocessing process"""
        self.process = Process(target=self._run)
        # Make it a deamon, so it will be deleted at the same time
        # of the main process
        self.process.daemon = True
        self.process.start()

    def _run(self):
        start = True
        with Parallel(n_jobs=self.n_workers,
                      batch_size="auto",
                      backend="threading") as parallel:
            while start or self.infinite_loop:
                start = False

                if self.shuffle:
                    indices = np.random.permutation(self.n_minibatches).astype(
                        np.int64)
                else:
                    indices = np.arange(len(self.minibatchlist),
                                        dtype=np.int64)

                for minibatch_idx in indices:
                    batch_noisy, batch_obs_noisy, batch_next_obs_noisy = None, None, None
                    if self.shuffle:
                        images = np.stack(
                            (self.images_path[
                                self.minibatchlist[minibatch_idx]],
                             self.images_path[self.minibatchlist[minibatch_idx]
                                              + 1]))
                        images = images.flatten()
                    else:
                        images = self.images_path[
                            self.minibatchlist[minibatch_idx]]

                    if self.n_workers <= 1:
                        batch = [
                            self._makeBatchElement(image_path, self.multi_view,
                                                   self.use_triplets)
                            for image_path in images
                        ]
                        if self.apply_occlusion:
                            batch_noisy = [
                                self._makeBatchElement(
                                    image_path,
                                    self.multi_view,
                                    self.use_triplets,
                                    apply_occlusion=self.apply_occlusion,
                                    occlusion_percentage=self.
                                    occlusion_percentage)
                                for image_path in images
                            ]

                    else:
                        batch = parallel(
                            delayed(self._makeBatchElement)(
                                image_path, self.multi_view, self.use_triplets)
                            for image_path in images)
                        if self.apply_occlusion:
                            batch_noisy = parallel(
                                delayed(self._makeBatchElement)
                                (image_path,
                                 self.multi_view,
                                 self.use_triplets,
                                 apply_occlusion=self.apply_occlusion,
                                 occlusion_percentage=self.occlusion_percentage
                                 ) for image_path in images)

                    batch = th.cat(batch, dim=0)
                    if self.apply_occlusion:
                        batch_noisy = th.cat(batch_noisy, dim=0)

                    if self.shuffle:
                        batch_obs, batch_next_obs = batch[:len(images) //
                                                          2], batch[len(images
                                                                        ) //
                                                                    2:]
                        if batch_noisy is not None:
                            batch_obs_noisy, batch_next_obs_noisy = batch_noisy[:len(images) // 2], \
                                                                    batch_noisy[len(images) // 2:]
                        self.queue.put(
                            (minibatch_idx, batch_obs, batch_next_obs,
                             batch_obs_noisy, batch_next_obs_noisy))
                    else:
                        self.queue.put(batch)

                    # Free memory
                    if self.shuffle:
                        del batch_obs
                        del batch_next_obs
                        if batch_noisy is not None:
                            del batch_obs_noisy
                            del batch_next_obs_noisy
                    del batch
                    del batch_noisy

                self.queue.put(None)

    @classmethod
    def _makeBatchElement(cls,
                          image_path,
                          multi_view=False,
                          use_triplets=False,
                          apply_occlusion=False,
                          occlusion_percentage=None):
        """
        :param image_path: (str) path to an image (without the 'data/' prefix)
        :param multi_view: (bool)
        :param use_triplets: (bool)
        :return: (th.Tensor)
        """
        # Remove trailing .jpg if present
        image_path = 'data/' + image_path.split('.jpg')[0]

        if multi_view:
            images = []

            # Load different view of the same timestep
            for i in range(2):
                im = cv2.imread("{}_{}.jpg".format(image_path, i + 1))
                if im is None:
                    raise ValueError(
                        "tried to load {}_{}.jpg, but it was not found".format(
                            image_path, i + 1))
                images.append(
                    preprocessImage(im,
                                    apply_occlusion=apply_occlusion,
                                    occlusion_percentage=occlusion_percentage))
            ####################
            # loading a negative observation
            if use_triplets:
                # End of file format for positive & negative observations (camera 1) - length : 6 characters
                extra_chars = '_1.jpg'

                # getting path for all files of same record episode, e.g path_to_data/record_001/frame[0-9]{6}*
                digits_path = glob.glob(image_path[:-6] + '[0-9]*' +
                                        extra_chars)

                # getting the current & all frames' timesteps
                current = int(image_path[-6:])
                # For all others extract last 6 digits (timestep) after removing the extra chars
                all_frame_steps = [
                    int(k[:-len(extra_chars)][-6:]) for k in digits_path
                ]
                # removing current positive timestep from the list
                all_frame_steps.remove(current)

                # negative timestep by random sampling
                length_set_steps = len(all_frame_steps)
                negative = all_frame_steps[random.randint(
                    0, length_set_steps - 1)]
                negative_path = '{}{:06d}'.format(image_path[:-6], negative)

                im3 = cv2.imread(negative_path + "_1.jpg")
                if im3 is None:
                    raise ValueError(
                        "tried to load {}_{}.jpg, but it was not found".format(
                            negative_path, 1))
                im3 = preprocessImage(im3)
                # stacking along channels
                images.append(im3)

            im = np.dstack(images)
        else:
            im = cv2.imread("{}.jpg".format(image_path))
            if im is None:
                raise ValueError(
                    "tried to load {}.jpg, but it was not found".format(
                        image_path))

            im = preprocessImage(im,
                                 apply_occlusion=apply_occlusion,
                                 occlusion_percentage=occlusion_percentage)

        # Channel first (for pytorch convolutions) + one dim for the batch
        # th.tensor creates a copy
        im = th.tensor(im.reshape((1, ) + im.shape).transpose(0, 3, 2, 1))
        return im

    def __len__(self):
        return self.n_minibatches

    def __iter__(self):
        return self

    def __next__(self):
        while True:
            try:
                val = self.queue.get_nowait()
                break
            except queue.Empty:
                time.sleep(0.001)
                continue
        if val is None:
            raise StopIteration
        return val

    next = __next__  # Python 2 compatibility

    def __del__(self):
        if self.process is not None:
            self.process.terminate()