def test_deduplicate(): directory = tempfile.TemporaryDirectory() original = testing.DEFAULT_TEST_IMAGES[0] duplicate = os.path.join(directory.name, 'image1.jpg') shutil.copy(original, duplicate) pairs = tools.deduplicate(files=[ testing.DEFAULT_TEST_IMAGES[0], testing.DEFAULT_TEST_IMAGES[1], duplicate ], hashers=[(hashers.PHash(hash_size=16), 0.25)]) assert len(pairs) == 1 file1, file2 = pairs[0] assert ((file1 == duplicate) and (file2 == original)) or ((file1 == original) and (file2 == duplicate))
def test_deduplicate_u8(): # This test verifies that extensions.compute_euclidean_pairwise_duplicates # works properly. directory = tempfile.TemporaryDirectory() original = testing.DEFAULT_TEST_IMAGES[0] duplicate = os.path.join(directory.name, 'image1.jpg') shutil.copy(original, duplicate) pairs = tools.deduplicate(files=[ testing.DEFAULT_TEST_IMAGES[0], testing.DEFAULT_TEST_IMAGES[1], duplicate ], hashers=[(hashers.PHashU8(hash_size=16), 10)]) assert len(pairs) == 1 file1, file2 = pairs[0] assert ((file1 == duplicate) and (file2 == original)) or ((file1 == original) and (file2 == duplicate))
def is_img_suspicious(img): duplicate_pairs = tools.deduplicate(files=filepaths, hashers=[(hashers.PHash(hash_size=16), 0.2)]) print(duplicate_pairs)