Exemple #1
0
    def infer(self, requests: Iterable[ObjectProvider]) -> Iterable[ResultProvider]:
        semaphore = threading.Semaphore(256)  # Make sure that the load function doesn't overload the consumer

        with mp.get_context('spawn').Pool(min(16, mp.cpu_count()), initializer=set_worker_feature_provider,
                                          initargs=(self._feature_provider.feature_extractor,
                                                    self._feature_provider.cache)) as pool:
            images = pool.imap_unordered(load_from_content, bounded_iter(requests, semaphore))
            yield from self._infer_inner(images, semaphore)
Exemple #2
0
    def _get_example_features(
            self, example_dir: Path) -> Dict[str, List[List[float]]]:
        semaphore = threading.Semaphore(
            256
        )  # Make sure that the load function doesn't overload the consumer

        with mp.get_context('spawn').Pool(
                min(4, mp.cpu_count()),
                initializer=set_worker_feature_provider,
                initargs=(self.feature_provider.feature_extractor,
                          self.feature_provider.cache)) as pool:
            images = pool.imap_unordered(
                load_from_path, bounded_iter(example_dir.glob('*/*'),
                                             semaphore))
            feature_queue = queue.Queue()

            @log_exceptions
            def process_uncached():
                cached = 0
                uncached = 0
                batch = []
                for label, should_process, payload in images:
                    semaphore.release()
                    if should_process:
                        image, key = payload
                        batch.append(
                            (label, torch.from_numpy(image).to(
                                self.feature_provider.device,
                                non_blocking=True), key))
                        if len(batch) == BATCH_SIZE:
                            self._process_batch(batch, feature_queue)
                            batch = []
                        uncached += 1
                    else:
                        feature_queue.put((label, payload))
                        cached += 1

                if len(batch) > 0:
                    self._process_batch(batch, feature_queue)

                logger.info(
                    '{} cached examples, {} new examples preprocessed'.format(
                        cached, uncached))
                feature_queue.put(None)

            threading.Thread(target=process_uncached,
                             name='process-uncached-trainer').start()

            i = 0
            features = defaultdict(list)
            for feature in to_iter(feature_queue):
                i += 1
                features[feature[0]].append(feature[1])

            logger.info('Retrieved {} feature vectors'.format(i))

            return features
Exemple #3
0
    def infer_dir(self, directory: Path, callback_fn: Callable[[int, float], None]) -> None:
        semaphore = threading.Semaphore(256)  # Make sure that the load function doesn't overload the consumer

        with mp.get_context('spawn').Pool(min(16, mp.cpu_count()), initializer=set_worker_feature_provider,
                                          initargs=(self._feature_provider.feature_extractor,
                                                    self._feature_provider.cache)) as pool:
            images = pool.imap_unordered(load_from_path, bounded_iter(directory.glob('*/*'), semaphore))
            results = self._infer_inner(images, semaphore)

            i = 0
            for result in results:
                i += 1
                # TODO(hturki): Should we get the target label in a less hacky way?
                callback_fn(int(result.id.split('/')[-2]), result.score)
                if i % 1000 == 0:
                    logger.info('{} examples scored so far'.format(i))
Exemple #4
0
    def infer(self, requests: Iterable[ObjectProvider]) -> Iterable[ResultProvider]:
        semaphore = mp.Semaphore(256)  # Make sure that the load function doesn't overload the consumer
        batch = []

        with mp.get_context('spawn').Pool(min(16, mp.cpu_count()), initializer=set_test_transforms,
                                          initargs=(self._test_transforms,)) as pool:
            items = pool.imap_unordered(preprocess, bounded_iter(requests, semaphore))

            for item in items:
                semaphore.release()
                batch.append(item)
                if len(batch) == self._batch_size:
                    yield from self._process_batch(batch)
                    batch = []

        if len(batch) > 0:
            yield from self._process_batch(batch)