Example #1
0
def test_scene_detection_batches():
    hasher = hashers.SimpleSceneDetection(base_hasher=hashers.TMKL1(
        frames_per_second=30,
        frame_hasher=hashers.PHashU8(),
        norm=None,
        distance_metric='euclidean'),
                                          max_scene_length=10)
    hashes_v2s = hasher.compute('perception/testing/videos/v2s.mov',
                                errors='raise')
    hashes_batches = []
    frame_count = 0
    for batch in hasher.compute_batches('perception/testing/videos/v2s.mov',
                                        batch_size=1):
        for hash_string, frames in batch:
            hashes_batches.append(hash_string)
            frame_count += len(frames)
    # Ensure we get the same hashes whether using compute or compute_batches
    assert len(hashes_batches) == len(hashes_v2s)
    assert all(h1 == h2 for h1, h2 in zip(hashes_batches, hashes_v2s))

    expected_frame_count = 0
    for _, _, _ in hashers.tools.read_video(
            'perception/testing/videos/v2s.mov', frames_per_second=30):
        expected_frame_count += 1

    # Ensure all frames were accounted for in scene detection
    assert expected_frame_count == frame_count
Example #2
0
def test_scene_detection():
    hasher = hashers.SimpleSceneDetection(base_hasher=hashers.TMKL1(
        frames_per_second=30,
        frame_hasher=hashers.PHashU8(),
        norm=None,
        distance_metric='euclidean'),
                                          max_scene_length=10)
    assert len(
        hasher.compute('perception/testing/videos/v1.m4v',
                       errors='raise')) == 1
    assert len(
        hasher.compute('perception/testing/videos/v2.m4v',
                       errors='raise')) == 1
    hashes_v2s = hasher.compute('perception/testing/videos/v2s.mov',
                                errors='raise')
    assert len(hashes_v2s) == 2
Example #3
0
def test_deduplicate_u8():
    # This test verifies that extensions.compute_euclidean_pairwise_duplicates
    # works properly.
    directory = tempfile.TemporaryDirectory()
    original = testing.DEFAULT_TEST_IMAGES[0]
    duplicate = os.path.join(directory.name, 'image1.jpg')
    shutil.copy(original, duplicate)
    pairs = tools.deduplicate(files=[
        testing.DEFAULT_TEST_IMAGES[0], testing.DEFAULT_TEST_IMAGES[1],
        duplicate
    ],
                              hashers=[(hashers.PHashU8(hash_size=16), 10)])
    assert len(pairs) == 1
    file1, file2 = pairs[0]
    assert ((file1 == duplicate) and
            (file2 == original)) or ((file1 == original) and
                                     (file2 == duplicate))
Example #4
0
def test_deduplicate_hashes_multiple():
    # This test verifies that deduplicate_hashes functions properly
    # when there is more than one hash for a file.
    directory = tempfile.TemporaryDirectory()
    original = testing.DEFAULT_TEST_IMAGES[0]
    duplicate = os.path.join(directory.name, 'image1.jpg')
    hasher = hashers.PHashU8(hash_size=16)
    shutil.copy(original, duplicate)
    hashes = [
        (0, hasher.compute(original)),
        (1, hasher.compute(duplicate)),
        (1, hasher.compute(duplicate)),
        (1, hasher.compute(duplicate)),
        (2, hasher.compute(testing.DEFAULT_TEST_IMAGES[1])),
    ]
    pairs = tools.deduplicate_hashes(hashes=hashes,
                                     threshold=10,
                                     hash_format='base64',
                                     hash_length=hasher.hash_length,
                                     distance_metric='euclidean',
                                     hash_dtype='uint8')
    assert len(pairs) == 1
    file1, file2 = pairs[0]
    assert ((file1 == 0) and (file2 == 1)) or ((file1 == 1) and (file2 == 0))