예제 #1
0
def test_encode_images_verbose_false(capsys):
    phasher = PHash(verbose=False)
    phasher.encode_images(image_dir=PATH_IMAGE_DIR)
    out, err = capsys.readouterr()

    assert '' == err
    assert '' == out
예제 #2
0
def test_find_duplicates_to_remove_dir():
    phasher = PHash()
    removal_list = phasher.find_duplicates_to_remove(image_dir=PATH_IMAGE_DIR,
                                                     max_distance_threshold=10)
    assert isinstance(removal_list, list)
    assert (removal_list == ['ukbench00120.jpg']
            or removal_list == ['ukbench00120_resize.jpg'])
예제 #3
0
def test__find_duplicates_dict_outfile_none(mocker):
    encoding_map = {'1.jpg': '123456'}
    threshold = 10
    scores = True
    outfile = None
    verbose = False
    myhasher = PHash(verbose=verbose)
    hasheval_mocker = mocker.patch('imagededup.methods.hashing.HashEval')
    save_json_mocker = mocker.patch('imagededup.methods.hashing.save_json')
    myhasher._find_duplicates_dict(
        encoding_map=encoding_map,
        max_distance_threshold=threshold,
        scores=scores,
        outfile=outfile,
    )
    hasheval_mocker.assert_called_with(
        test=encoding_map,
        queries=encoding_map,
        distance_function=Hashing.hamming_distance,
        verbose=verbose,
        threshold=threshold,
        search_method='brute_force_cython',
    )
    hasheval_mocker.return_value.retrieve_results.assert_called_once_with(
        scores=scores)
    save_json_mocker.assert_not_called()
예제 #4
0
def test_find_duplicates_correctness():
    phasher = PHash()
    duplicate_dict = phasher.find_duplicates(image_dir=PATH_IMAGE_DIR,
                                             max_distance_threshold=10)
    assert isinstance(duplicate_dict, dict)
    assert isinstance(list(duplicate_dict.values())[0], list)
    assert len(duplicate_dict['ukbench09268.jpg']) == 0
    assert duplicate_dict['ukbench00120.jpg'] == ['ukbench00120_resize.jpg']
예제 #5
0
def test_find_duplicates_verbose_false(capsys):
    phasher = PHash(verbose=False)
    phasher.find_duplicates(
        image_dir=PATH_IMAGE_DIR, max_distance_threshold=10, scores=False, outfile=False
    )
    out, err = capsys.readouterr()

    assert '' == out
    assert '' == err
예제 #6
0
def test_find_duplicates_correctness_score():
    phasher = PHash()
    duplicate_dict = phasher.find_duplicates(image_dir=PATH_IMAGE_DIR,
                                             max_distance_threshold=10,
                                             scores=True)
    assert isinstance(duplicate_dict, dict)
    duplicates = list(duplicate_dict.values())
    assert isinstance(duplicates[0], list)
    assert duplicate_dict['ukbench09268.jpg'] == []
    assert duplicate_dict['ukbench00120.jpg'] == [('ukbench00120_resize.jpg',
                                                   0)]
예제 #7
0
def test_find_duplicates_to_remove_encoding():
    encoding = {
        'ukbench00120_resize.jpg': '9fee256239984d71',
        'ukbench00120_rotation.jpg': '850d513c4fdcbb72',
        'ukbench00120.jpg': '9fee256239984d71',
        'ukbench00120_hflip.jpg': 'cabb7237e8cd3824',
        'ukbench09268.jpg': 'c73c36c2da2f29c9',
    }
    phasher = PHash()
    removal_list = phasher.find_duplicates_to_remove(encoding_map=encoding,
                                                     max_distance_threshold=10)
    assert isinstance(removal_list, list)
    assert (removal_list == ['ukbench00120.jpg']
            or removal_list == ['ukbench00120_resize.jpg'])
예제 #8
0
def test_find_duplicates_encoding_map_input():
    encoding = {
        'ukbench00120_resize.jpg': '9fee256239984d71',
        'ukbench00120_rotation.jpg': '850d513c4fdcbb72',
        'ukbench00120.jpg': '9fee256239984d71',
        'ukbench00120_hflip.jpg': 'cabb7237e8cd3824',
        'ukbench09268.jpg': 'c73c36c2da2f29c9',
    }
    phasher = PHash()
    duplicate_dict = phasher.find_duplicates(encoding_map=encoding,
                                             max_distance_threshold=10)
    assert isinstance(duplicate_dict, dict)
    assert isinstance(list(duplicate_dict.values())[0], list)
    assert len(duplicate_dict['ukbench09268.jpg']) == 0
    assert duplicate_dict['ukbench00120.jpg'] == ['ukbench00120_resize.jpg']
예제 #9
0
def test_find_duplicates_clearing():
    phasher = PHash()
    duplicate_dict = phasher.find_duplicates(
        image_dir=PATH_IMAGE_DIR,
        max_distance_threshold=10,
        scores=True,
        search_method='brute_force_cython',
    )

    duplicate_dict = phasher.find_duplicates(
        image_dir=PATH_IMAGE_DIR,
        max_distance_threshold=10,
        scores=True,
        search_method='brute_force_cython',
    )

    assert isinstance(duplicate_dict, dict)
    duplicates = list(duplicate_dict.values())
    assert isinstance(duplicates[0], list)
    assert duplicate_dict['ukbench09268.jpg'] == []
    assert duplicate_dict['ukbench00120.jpg'] == [('ukbench00120_resize.jpg',
                                                   0)]
예제 #10
0
    hasher.find_duplicates_to_remove(encoding_map=encoding_map,
                                     max_distance_threshold=threshold,
                                     outfile=outfile)
    find_duplicates_mocker.assert_called_once_with(
        encoding_map=encoding_map,
        image_dir=None,
        max_distance_threshold=threshold,
        scores=False,
    )
    get_files_to_remove_mocker.assert_called_once_with(ret_val_find_dup_dict)
    save_json_mocker.assert_not_called()


# Integration tests

phasher = PHash()
dhasher = DHash()
ahasher = AHash()
whasher = WHash()

common_test_parameters = [
    phasher.encode_image,
    dhasher.encode_image,
    ahasher.encode_image,
    whasher.encode_image,
]


@pytest.mark.parametrize('hash_function', common_test_parameters)
class TestCommon:
    def test_len_hash(self, hash_function):