async def _send_to_server( self, image: str, n_repeats: int, client: httpx.Client, proc_callback: Callable[[bool], None] = None) -> Optional[List[float]]: """ Function get list of images objects. It send them to server and retrieve responses. Parameters ---------- image Single image path. n_repeats The index of retry. It is zero when batch is sent to server for the first time. In case when first trial was not successful we will send images again. client HTTPX client that communicates with the server proc_callback A function that is called after each image is fully processed by either getting a successful response from the server, getting the result from cache or skipping the image. Returns ------- emb Embedding. For images that are not successfully embedded returns None. """ await self.__wait_until_released() if self.cancelled: raise EmbeddingCancelledException() self.num_parallel_requests += 1 # load image im = self._image_loader.load_image_bytes(image, self._im_size) if im is None: self.num_parallel_requests -= 1 return None # if image in cache return it cache_key = self._cache.md5_hash(im) emb = self._cache.get_cached_result_or_none(cache_key) if emb is None: # gather responses url = f"/image/{self._model}?machine={self.machine_id}" \ f"&session={self.session_id}&retry={n_repeats}" emb = await self._send_request(client, im, url) if emb is not None: self._cache.add(cache_key, emb) if proc_callback: proc_callback(emb is not None) self.num_parallel_requests -= 1 return emb
def _get_responses_from_server(self, http_streams, cache_keys, image_processed_callback): """Wait for responses from an http2 server in a blocking manner.""" embeddings = [] for stream_id, cache_key in zip(http_streams, cache_keys): if self.cancelled: raise EmbeddingCancelledException() if not stream_id and not cache_key: # when image cannot be loaded embeddings.append(self.CANNOT_LOAD) if image_processed_callback: image_processed_callback(success=False) continue if not stream_id: # skip rest of the waiting because image was either # skipped at loading or is present in the local cache embedding = self._cache.get_cached_result_or_none(cache_key) embeddings.append(embedding) if image_processed_callback: image_processed_callback(success=embedding is not None) continue try: response = self._get_json_response_or_none(stream_id) except (ConnectionError, MaxNumberOfRequestsError): self._cache.persist_cache() self.reconnect_to_server() return embeddings if not response or 'embedding' not in response: # returned response is not a valid json response # or the embedding key not present in the json embeddings.append(None) else: # successful response embedding = np.array(response['embedding'], dtype=np.float16) embeddings.append(embedding) self._cache.add(cache_key, embedding) if image_processed_callback: image_processed_callback(embeddings[-1] is not None) return embeddings
def _send_to_server(self, file_paths, image_processed_callback, retry_n): """ Load images and compute cache keys and send requests to an http2 server for valid ones. """ cache_keys = [] http_streams = [] for file_path in file_paths: if self.cancelled: raise EmbeddingCancelledException() image = self._image_loader.load_image_bytes( file_path, self._target_image_size) if not image: # skip the sending because image was skipped at loading http_streams.append(None) cache_keys.append(None) continue cache_key = self._cache.md5_hash(image) cache_keys.append(cache_key) if self._cache.exist_in_cache(cache_key): # skip the sending because image is present in the # local cache http_streams.append(None) continue try: headers = { 'Content-Type': 'image/jpeg', 'Content-Length': str(len(image)) } stream_id = self._send_request( method='POST', url='/image/' + self._model + '?machine={}&session={}&retry={}'.format( self.machine_id, self.session_id, retry_n), headers=headers, body_bytes=image) http_streams.append(stream_id) except (ConnectionError, BrokenPipeError): self._cache.persist_cache() raise # wait for the responses in a blocking manner return self._get_responses_from_server(http_streams, cache_keys, image_processed_callback)
async def embedd_batch( self, file_paths: List[str], n_repeats: int, proc_callback: Callable[[bool], None] = None ) -> List[Optional[List[float]]]: """ Function perform embedding of a batch of images. Parameters ---------- file_paths A list of file paths for images to be embedded. n_repeats The index of retry. It is zero when batch is sent to server for the first time. In case when first trial was not successful we will send images again. proc_callback A function that is called after each image is fully processed by either getting a successful response from the server, getting the result from cache or skipping the image. Returns ------- embeddings Array-like of float arrays (embeddings) for successfully embedded images and Nones for skipped images. Raises ------ EmbeddingCancelledException: If cancelled attribute is set to True (default=False). """ requests = [] async with httpx.Client(timeout=self.timeouts, base_url=self.server_url) as client: for p in file_paths: if self.cancelled: raise EmbeddingCancelledException() requests.append( self._send_to_server(p, n_repeats, client, proc_callback)) embeddings = await asyncio.gather(*requests) self._cache.persist_cache() assert self.num_parallel_requests == 0 return embeddings
def _embed(self, file_path): """ Load images and compute cache keys and send requests to an http2 server for valid ones. """ if self.cancelled: raise EmbeddingCancelledException() image = self._image_loader.load_image_or_none(file_path, self._target_image_size) if image is None: return None image = self._image_loader.preprocess_squeezenet(image) cache_key = self._cache.md5_hash(image) cached_im = self._cache.get_cached_result_or_none(cache_key) if cached_im is not None: return cached_im embedded_image = self.embedder.predict([image]) embedded_image = embedded_image[0][0] self._cache.add(cache_key, embedded_image) return embedded_image