def test_encode_images_verbose_false(capsys): phasher = PHash(verbose=False) phasher.encode_images(image_dir=PATH_IMAGE_DIR) out, err = capsys.readouterr() assert '' == err assert '' == out
def test_find_duplicates_to_remove_dir(): phasher = PHash() removal_list = phasher.find_duplicates_to_remove(image_dir=PATH_IMAGE_DIR, max_distance_threshold=10) assert isinstance(removal_list, list) assert (removal_list == ['ukbench00120.jpg'] or removal_list == ['ukbench00120_resize.jpg'])
def test__find_duplicates_dict_outfile_none(mocker): encoding_map = {'1.jpg': '123456'} threshold = 10 scores = True outfile = None verbose = False myhasher = PHash(verbose=verbose) hasheval_mocker = mocker.patch('imagededup.methods.hashing.HashEval') save_json_mocker = mocker.patch('imagededup.methods.hashing.save_json') myhasher._find_duplicates_dict( encoding_map=encoding_map, max_distance_threshold=threshold, scores=scores, outfile=outfile, ) hasheval_mocker.assert_called_with( test=encoding_map, queries=encoding_map, distance_function=Hashing.hamming_distance, verbose=verbose, threshold=threshold, search_method='brute_force_cython', ) hasheval_mocker.return_value.retrieve_results.assert_called_once_with( scores=scores) save_json_mocker.assert_not_called()
def test_find_duplicates_correctness(): phasher = PHash() duplicate_dict = phasher.find_duplicates(image_dir=PATH_IMAGE_DIR, max_distance_threshold=10) assert isinstance(duplicate_dict, dict) assert isinstance(list(duplicate_dict.values())[0], list) assert len(duplicate_dict['ukbench09268.jpg']) == 0 assert duplicate_dict['ukbench00120.jpg'] == ['ukbench00120_resize.jpg']
def test_find_duplicates_verbose_false(capsys): phasher = PHash(verbose=False) phasher.find_duplicates( image_dir=PATH_IMAGE_DIR, max_distance_threshold=10, scores=False, outfile=False ) out, err = capsys.readouterr() assert '' == out assert '' == err
def test_find_duplicates_correctness_score(): phasher = PHash() duplicate_dict = phasher.find_duplicates(image_dir=PATH_IMAGE_DIR, max_distance_threshold=10, scores=True) assert isinstance(duplicate_dict, dict) duplicates = list(duplicate_dict.values()) assert isinstance(duplicates[0], list) assert duplicate_dict['ukbench09268.jpg'] == [] assert duplicate_dict['ukbench00120.jpg'] == [('ukbench00120_resize.jpg', 0)]
def test_find_duplicates_to_remove_encoding(): encoding = { 'ukbench00120_resize.jpg': '9fee256239984d71', 'ukbench00120_rotation.jpg': '850d513c4fdcbb72', 'ukbench00120.jpg': '9fee256239984d71', 'ukbench00120_hflip.jpg': 'cabb7237e8cd3824', 'ukbench09268.jpg': 'c73c36c2da2f29c9', } phasher = PHash() removal_list = phasher.find_duplicates_to_remove(encoding_map=encoding, max_distance_threshold=10) assert isinstance(removal_list, list) assert (removal_list == ['ukbench00120.jpg'] or removal_list == ['ukbench00120_resize.jpg'])
def test_find_duplicates_encoding_map_input(): encoding = { 'ukbench00120_resize.jpg': '9fee256239984d71', 'ukbench00120_rotation.jpg': '850d513c4fdcbb72', 'ukbench00120.jpg': '9fee256239984d71', 'ukbench00120_hflip.jpg': 'cabb7237e8cd3824', 'ukbench09268.jpg': 'c73c36c2da2f29c9', } phasher = PHash() duplicate_dict = phasher.find_duplicates(encoding_map=encoding, max_distance_threshold=10) assert isinstance(duplicate_dict, dict) assert isinstance(list(duplicate_dict.values())[0], list) assert len(duplicate_dict['ukbench09268.jpg']) == 0 assert duplicate_dict['ukbench00120.jpg'] == ['ukbench00120_resize.jpg']
def test_find_duplicates_clearing(): phasher = PHash() duplicate_dict = phasher.find_duplicates( image_dir=PATH_IMAGE_DIR, max_distance_threshold=10, scores=True, search_method='brute_force_cython', ) duplicate_dict = phasher.find_duplicates( image_dir=PATH_IMAGE_DIR, max_distance_threshold=10, scores=True, search_method='brute_force_cython', ) assert isinstance(duplicate_dict, dict) duplicates = list(duplicate_dict.values()) assert isinstance(duplicates[0], list) assert duplicate_dict['ukbench09268.jpg'] == [] assert duplicate_dict['ukbench00120.jpg'] == [('ukbench00120_resize.jpg', 0)]
hasher.find_duplicates_to_remove(encoding_map=encoding_map, max_distance_threshold=threshold, outfile=outfile) find_duplicates_mocker.assert_called_once_with( encoding_map=encoding_map, image_dir=None, max_distance_threshold=threshold, scores=False, ) get_files_to_remove_mocker.assert_called_once_with(ret_val_find_dup_dict) save_json_mocker.assert_not_called() # Integration tests phasher = PHash() dhasher = DHash() ahasher = AHash() whasher = WHash() common_test_parameters = [ phasher.encode_image, dhasher.encode_image, ahasher.encode_image, whasher.encode_image, ] @pytest.mark.parametrize('hash_function', common_test_parameters) class TestCommon: def test_len_hash(self, hash_function):