class SourceImageStorage: def __init__(self, flush_data=False): logger = logging.getLogger(__name__) logger.info("Init SourceImageStorage") self._storage_service = ImageService(storage_params=StorageParameters( **config.FILE_SERVICE_PARAMETERS)) self._es = ElasticSearchDriver( index=config.ELASTIC_SOURCE_IMAGES_INDEX, doc_type=config.ELASTIC_SOURCE_IMAGES_TYPE, flush_data=flush_data) def save_source_image(self, image: bytes, metadata: SourceImageMetadata) -> str: logger = logging.getLogger(__name__) logger.info("Saving source image with metadata {}".format(metadata)) id_cached = self._storage_service.put_encoded(image) source_image_elastic_id = self._es.index( create_doc(metadata, id_cached)) return source_image_elastic_id def get_metadata_by_id(self, image_id: str) -> SourceImageMetadata: logger = logging.getLogger(__name__) logger.info("loading image with id {}".format(image_id)) raw_doc = self._es.get_doc(image_id) metadata = SourceImageMetadata(path=raw_doc[DOC_FIELD_IMAGE_URL]) return metadata
def test_put_and_get(self, unique_temp_index): driver = ElasticSearchDriver(unique_temp_index, "some-doc-type") id_ = driver.index(self.doc1) doc = driver.get_doc(id_) assert doc == self.doc1
class RegionRepository: def __init__(self, descriptor_shape: Iterable[int], flush_data=False): self._es = ElasticSearchDriver(index=config.ELASTIC_DESCRIPTOR_INDEX, doc_type=config.ELASTIC_DESCRIPTOR_TYPE, flush_data=flush_data) self._search_terms_creator = SearchTermsCreator(descriptor_shape) def save(self, image_region: ImageRegion, reference_to_source: str) -> str: doc = self._create_doc(image_region, reference_to_source) image_region_elastic_id = self._es.index(doc) return image_region_elastic_id def find(self, descriptor: Descriptor) -> List[SearchResult]: words = self._get_words(descriptor) results = self._es.search_by_words(words, list(words.keys())) return results def _create_doc(self, image_region: ImageRegion, reference_to_source: str) -> Dict[str, object]: quantized_words = self._search_terms_creator.get_dictionary_of_words(image_region.descriptor) base = {SearchResult.FIELD_SOURCE_ID: reference_to_source, SearchResult.FIELD_DESCRIPTOR: image_region.descriptor.vector_as_lists} return dict(**base, **quantized_words) def _get_words(self, descriptor: Descriptor) -> Dict[str, object]: return self._search_terms_creator.get_dictionary_of_words(descriptor)
def test_search_by_words_works(self, unique_temp_index): driver = ElasticSearchDriver(unique_temp_index, "some-doc-type") driver.index(self.doc1) driver.index(self.doc2) for attempt in range(10): search_results = driver.search_by_words( {"word2": "value2shared"}, ["word1", "word2", "word3"]) time.sleep(1) if len(search_results) == 2: break else: assert False, "Unable to fetch results in a reasonable time " assert sorted(search_results, key=lambda x: x.source_id) == \ sorted([SearchResult(self.payload1), SearchResult(self.payload2)], key=lambda x: x.source_id)
def test_index_with_flush_works(self): # noinspection PyTypeChecker driver_with_flush = ElasticSearchDriver(index, doc_type, self.mocked_elastic, True) self.mocked_elastic.index.return_value = mocked_index_response result = driver_with_flush.index(doc) assert result is mocked_id self.mocked_elastic.index.assert_has_calls([ call(index=index, doc_type=doc_type, body=doc, refresh='wait_for') ])