def test_scene_detection_batches(): hasher = hashers.SimpleSceneDetection(base_hasher=hashers.TMKL1( frames_per_second=30, frame_hasher=hashers.PHashU8(), norm=None, distance_metric='euclidean'), max_scene_length=10) hashes_v2s = hasher.compute('perception/testing/videos/v2s.mov', errors='raise') hashes_batches = [] frame_count = 0 for batch in hasher.compute_batches('perception/testing/videos/v2s.mov', batch_size=1): for hash_string, frames in batch: hashes_batches.append(hash_string) frame_count += len(frames) # Ensure we get the same hashes whether using compute or compute_batches assert len(hashes_batches) == len(hashes_v2s) assert all(h1 == h2 for h1, h2 in zip(hashes_batches, hashes_v2s)) expected_frame_count = 0 for _, _, _ in hashers.tools.read_video( 'perception/testing/videos/v2s.mov', frames_per_second=30): expected_frame_count += 1 # Ensure all frames were accounted for in scene detection assert expected_frame_count == frame_count
def test_scene_detection(): hasher = hashers.SimpleSceneDetection(base_hasher=hashers.TMKL1( frames_per_second=30, frame_hasher=hashers.PHashU8(), norm=None, distance_metric='euclidean'), max_scene_length=10) assert len( hasher.compute('perception/testing/videos/v1.m4v', errors='raise')) == 1 assert len( hasher.compute('perception/testing/videos/v2.m4v', errors='raise')) == 1 hashes_v2s = hasher.compute('perception/testing/videos/v2s.mov', errors='raise') assert len(hashes_v2s) == 2
def test_deduplicate_u8(): # This test verifies that extensions.compute_euclidean_pairwise_duplicates # works properly. directory = tempfile.TemporaryDirectory() original = testing.DEFAULT_TEST_IMAGES[0] duplicate = os.path.join(directory.name, 'image1.jpg') shutil.copy(original, duplicate) pairs = tools.deduplicate(files=[ testing.DEFAULT_TEST_IMAGES[0], testing.DEFAULT_TEST_IMAGES[1], duplicate ], hashers=[(hashers.PHashU8(hash_size=16), 10)]) assert len(pairs) == 1 file1, file2 = pairs[0] assert ((file1 == duplicate) and (file2 == original)) or ((file1 == original) and (file2 == duplicate))
def test_deduplicate_hashes_multiple(): # This test verifies that deduplicate_hashes functions properly # when there is more than one hash for a file. directory = tempfile.TemporaryDirectory() original = testing.DEFAULT_TEST_IMAGES[0] duplicate = os.path.join(directory.name, 'image1.jpg') hasher = hashers.PHashU8(hash_size=16) shutil.copy(original, duplicate) hashes = [ (0, hasher.compute(original)), (1, hasher.compute(duplicate)), (1, hasher.compute(duplicate)), (1, hasher.compute(duplicate)), (2, hasher.compute(testing.DEFAULT_TEST_IMAGES[1])), ] pairs = tools.deduplicate_hashes(hashes=hashes, threshold=10, hash_format='base64', hash_length=hasher.hash_length, distance_metric='euclidean', hash_dtype='uint8') assert len(pairs) == 1 file1, file2 = pairs[0] assert ((file1 == 0) and (file2 == 1)) or ((file1 == 1) and (file2 == 0))