def search_by_image(image_content: bytes) -> List[Tuple[float, model.Post]]: query_signature = image_hash.generate_signature(image_content) query_words = image_hash.generate_words(query_signature) """ The unnest function is used here to expand one row containing the 'words' array into multiple rows each containing a singular word. Documentation of the unnest function can be found here: https://www.postgresql.org/docs/9.2/functions-array.html """ dbquery = """ SELECT s.post_id, s.signature, count(a.query) AS score FROM post_signature AS s, unnest(s.words, :q) AS a(word, query) WHERE a.word = a.query GROUP BY s.post_id ORDER BY score DESC LIMIT 100; """ candidates = db.session.execute(dbquery, {"q": query_words}) data = tuple( zip(*[(post_id, image_hash.unpack_signature(packedsig)) for post_id, packedsig, score in candidates])) if data: candidate_post_ids, sigarray = data distances = image_hash.normalized_distance(sigarray, query_signature) return [(distance, try_get_post_by_id(candidate_post_id)) for candidate_post_id, distance in zip(candidate_post_ids, distances) if distance < image_hash.DISTANCE_CUTOFF] else: return []
def test_signature_functions(read_asset, config_injector): sig1 = image_hash.generate_signature(read_asset('jpeg.jpg')) sig2 = image_hash.generate_signature(read_asset('jpeg-similar.jpg')) sig1_repacked = image_hash.unpack_signature( image_hash.pack_signature(sig1)) sig2_repacked = image_hash.unpack_signature( image_hash.pack_signature(sig2)) assert array_equal(sig1, sig1_repacked) assert array_equal(sig2, sig2_repacked) dist1 = image_hash.normalized_distance([sig1], sig2) assert abs(dist1[0] - 0.20599895341812172) < 1e-8 dist2 = image_hash.normalized_distance([sig2], sig2) assert abs(dist2[0]) < 1e-8 words1 = image_hash.generate_words(sig1) words2 = image_hash.generate_words(sig2) words_match = sum(word1 == word2 for word1, word2 in zip(words1, words2)) assert words_match == 17
def test_signature_avif(read_asset, config_injector): sig1 = image_hash.generate_signature(read_asset("avif.avif")) sig2 = image_hash.generate_signature(read_asset("avif-similar.avif")) sig1_repacked = image_hash.unpack_signature( image_hash.pack_signature(sig1)) sig2_repacked = image_hash.unpack_signature( image_hash.pack_signature(sig2)) assert array_equal(sig1, sig1_repacked) assert array_equal(sig2, sig2_repacked) dist1 = image_hash.normalized_distance([sig1], sig2) assert abs(dist1[0] - 0.22628712858355998) < 1e-8 dist2 = image_hash.normalized_distance([sig2], sig2) assert abs(dist2[0]) < 1e-8 words1 = image_hash.generate_words(sig1) words2 = image_hash.generate_words(sig2) words_match = sum(word1 == word2 for word1, word2 in zip(words1, words2)) assert words_match == 12