Example #1
0
    async def _send_to_server(
            self,
            image: str,
            n_repeats: int,
            client: httpx.Client,
            proc_callback: Callable[[bool],
                                    None] = None) -> Optional[List[float]]:
        """
        Function get list of images objects. It send them to server and
        retrieve responses.

        Parameters
        ----------
        image
            Single image path.
        n_repeats
            The index of retry. It is zero when batch is sent to server
            for the first time. In case when first trial was not successful
            we will send images again.
        client
            HTTPX client that communicates with the server
        proc_callback
            A function that is called after each image is fully processed
            by either getting a successful response from the server,
            getting the result from cache or skipping the image.

        Returns
        -------
        emb
            Embedding. For images that are not successfully embedded returns
            None.
        """
        await self.__wait_until_released()

        if self.cancelled:
            raise EmbeddingCancelledException()

        self.num_parallel_requests += 1
        # load image
        im = self._image_loader.load_image_bytes(image, self._im_size)
        if im is None:
            self.num_parallel_requests -= 1
            return None

        # if image in cache return it
        cache_key = self._cache.md5_hash(im)
        emb = self._cache.get_cached_result_or_none(cache_key)

        if emb is None:
            # gather responses
            url = f"/image/{self._model}?machine={self.machine_id}" \
                  f"&session={self.session_id}&retry={n_repeats}"
            emb = await self._send_request(client, im, url)
            if emb is not None:
                self._cache.add(cache_key, emb)
        if proc_callback:
            proc_callback(emb is not None)

        self.num_parallel_requests -= 1
        return emb
Example #2
0
    def _get_responses_from_server(self, http_streams, cache_keys,
                                   image_processed_callback):
        """Wait for responses from an http2 server in a blocking manner."""
        embeddings = []

        for stream_id, cache_key in zip(http_streams, cache_keys):
            if self.cancelled:
                raise EmbeddingCancelledException()

            if not stream_id and not cache_key:
                # when image cannot be loaded
                embeddings.append(self.CANNOT_LOAD)

                if image_processed_callback:
                    image_processed_callback(success=False)
                continue

            if not stream_id:
                # skip rest of the waiting because image was either
                # skipped at loading or is present in the local cache
                embedding = self._cache.get_cached_result_or_none(cache_key)
                embeddings.append(embedding)

                if image_processed_callback:
                    image_processed_callback(success=embedding is not None)
                continue

            try:
                response = self._get_json_response_or_none(stream_id)
            except (ConnectionError, MaxNumberOfRequestsError):
                self._cache.persist_cache()
                self.reconnect_to_server()
                return embeddings

            if not response or 'embedding' not in response:
                # returned response is not a valid json response
                # or the embedding key not present in the json
                embeddings.append(None)
            else:
                # successful response
                embedding = np.array(response['embedding'], dtype=np.float16)
                embeddings.append(embedding)
                self._cache.add(cache_key, embedding)

            if image_processed_callback:
                image_processed_callback(embeddings[-1] is not None)

        return embeddings
Example #3
0
    def _send_to_server(self, file_paths, image_processed_callback, retry_n):
        """ Load images and compute cache keys and send requests to
        an http2 server for valid ones.
        """
        cache_keys = []
        http_streams = []

        for file_path in file_paths:
            if self.cancelled:
                raise EmbeddingCancelledException()

            image = self._image_loader.load_image_bytes(
                file_path, self._target_image_size)
            if not image:
                # skip the sending because image was skipped at loading
                http_streams.append(None)
                cache_keys.append(None)
                continue

            cache_key = self._cache.md5_hash(image)
            cache_keys.append(cache_key)
            if self._cache.exist_in_cache(cache_key):
                # skip the sending because image is present in the
                # local cache
                http_streams.append(None)
                continue

            try:
                headers = {
                    'Content-Type': 'image/jpeg',
                    'Content-Length': str(len(image))
                }
                stream_id = self._send_request(
                    method='POST',
                    url='/image/' + self._model +
                    '?machine={}&session={}&retry={}'.format(
                        self.machine_id, self.session_id, retry_n),
                    headers=headers,
                    body_bytes=image)
                http_streams.append(stream_id)
            except (ConnectionError, BrokenPipeError):
                self._cache.persist_cache()
                raise

        # wait for the responses in a blocking manner
        return self._get_responses_from_server(http_streams, cache_keys,
                                               image_processed_callback)
Example #4
0
    async def embedd_batch(
        self,
        file_paths: List[str],
        n_repeats: int,
        proc_callback: Callable[[bool], None] = None
    ) -> List[Optional[List[float]]]:
        """
        Function perform embedding of a batch of images.

        Parameters
        ----------
        file_paths
            A list of file paths for images to be embedded.
        n_repeats
            The index of retry. It is zero when batch is sent to server
            for the first time. In case when first trial was not successful
            we will send images again.
        proc_callback
            A function that is called after each image is fully processed
            by either getting a successful response from the server,
            getting the result from cache or skipping the image.

        Returns
        -------
        embeddings
            Array-like of float arrays (embeddings) for successfully embedded
            images and Nones for skipped images.

        Raises
        ------
        EmbeddingCancelledException:
            If cancelled attribute is set to True (default=False).
        """
        requests = []
        async with httpx.Client(timeout=self.timeouts,
                                base_url=self.server_url) as client:
            for p in file_paths:
                if self.cancelled:
                    raise EmbeddingCancelledException()
                requests.append(
                    self._send_to_server(p, n_repeats, client, proc_callback))

            embeddings = await asyncio.gather(*requests)
        self._cache.persist_cache()
        assert self.num_parallel_requests == 0

        return embeddings
Example #5
0
    def _embed(self, file_path):
        """ Load images and compute cache keys and send requests to
        an http2 server for valid ones.
        """
        if self.cancelled:
            raise EmbeddingCancelledException()

        image = self._image_loader.load_image_or_none(file_path,
                                                      self._target_image_size)
        if image is None:
            return None
        image = self._image_loader.preprocess_squeezenet(image)

        cache_key = self._cache.md5_hash(image)
        cached_im = self._cache.get_cached_result_or_none(cache_key)
        if cached_im is not None:
            return cached_im

        embedded_image = self.embedder.predict([image])
        embedded_image = embedded_image[0][0]

        self._cache.add(cache_key, embedded_image)
        return embedded_image