def test_validate_arches(self): got = {} for arch_name in arches: cwd = os.path.dirname(os.path.realpath(__file__)) archPath = os.path.join(cwd, 'test_ptree_base', arch_name) arch = pArch.PhashArchive(archPath) archHashes = list(arch.iterHashes()) for item in archHashes: del item[1]['cont'] got[arch_name] = archHashes expect_keys = list(expect.keys()) got_keys = list(got.keys()) expect_keys.sort() got_keys.sort() self.assertEqual(expect_keys, got_keys) for key in expect_keys: if got[key] != expect_keys: print("Key:", key) pprint.pprint(got[key]) pprint.pprint(expect[key]) self.assertEqual(expect[key], got[key])
def __init__(self, archPath, pathFilter=None): ''' Params: pathFilter (list): default =``['']`` Basically, if you pass a list of valid path prefixes, any matches not on any of those path prefixes are not matched. Default is [''], which matches every path, because "anything".startswith('') is true ''' super().__init__() self.maskedPaths = pathFilter or [''] self.archPath = archPath self.arch = pArch.PhashArchive(archPath) self.log = logging.getLogger("Main.Deduper") self.log.info("ArchChecker Instantiated on '%s'", archPath)
def test_pArch_2(self): cwd = os.path.dirname(os.path.realpath(__file__)) archPath = os.path.join(cwd, 'testArches', 'testArches.zip') arch = pArch.PhashArchive(archPath) ret = arch.getHashInfo('dangerous-to-go-alone.jpg') ret.pop('cont') expect = { # 'dHash': 4576150637722077151, 'hexHash': 'dcd6097eeac911efed3124374f44085b', 'imX': 325, 'pHash': -149413575039568585, 'type': 'image/jpeg', 'imY': 307 } self.assertEqual(ret, expect)
def __init__(self, archPath, pathNegativeFilter=None, pathPositiveFilter=None, negativeKeywords=None): ''' Params: pathNegativeFilter (list): default =``[]`` List of paths to exclude from matching. By default, and empty list, leading to all possible paths being used. ''' super().__init__() self.negativeMaskedPaths = pathNegativeFilter or [] self.positiveMaskedPaths = pathPositiveFilter or [] self.negativeKeywords = negativeKeywords or [] self.archPath = archPath self.arch = pArch.PhashArchive(archPath) self.log = logging.getLogger("Main.Deduper") self.log.info("ArchChecker Instantiated on '%s'", archPath)
def test_pArch_4(self): cwd = os.path.dirname(os.path.realpath(__file__)) archPath = os.path.join(cwd, 'testArches', 'testArches.7z') arch = pArch.PhashArchive(archPath) ret = arch.getHashInfo('dangerous-to-go-alone.jpg') # We're not confirming image contents here, so pop that from the list because it's # enormous, and I don't want to insert that into the file. ret.pop('cont') expect = { # 'dHash': 4576150637722077151, 'hexHash': 'dcd6097eeac911efed3124374f44085b', 'imX': 325, 'pHash': -149413575039568585, 'type': 'image/jpeg', 'imY': 307 } self.assertEqual(ret, expect)
def test_pArch_1(self): cwd = os.path.dirname(os.path.realpath(__file__)) archPath = os.path.join(cwd, 'testArches', 'testArches.zip') arch = pArch.PhashArchive(archPath) match = [ ( 'Lolcat_this_is_mah_job.jpg', { 'hexHash': 'd9ceeb6b43c2d7d096532eabfa6cf482', 'type': 'image/jpeg', 'imY': 389, 'pHash': 27427800275512429, # 'dHash' : -4504585791368671746, 'imX': 493 }), ( 'Lolcat_this_is_mah_job.png', { 'hexHash': '1268e704908cc39299d73d6caafc23a0', 'type': 'image/png', 'imY': 389, 'pHash': 27427800275512429, # 'dHash' : -4504585791368671746, 'imX': 493 }), ( 'Lolcat_this_is_mah_job_small.jpg', { 'hexHash': '40d39c436e14282dcda06e8aff367307', 'type': 'image/jpeg', 'imY': 237, 'pHash': 27427800275512429, # 'dHash' : -4504585791368671746, 'imX': 300 }), ( 'dangerous-to-go-alone.jpg', { 'hexHash': 'dcd6097eeac911efed3124374f44085b', 'type': 'image/jpeg', 'imY': 307, 'pHash': -149413575039568585, # 'dHash' : 4576150637722077151, 'imX': 325 }), ( 'lolcat-crocs.jpg', { 'hexHash': '6d0a977694630ac9d1d33a7f068e10f8', 'type': 'image/jpeg', 'imY': 363, 'pHash': -5569898607211671279, # 'dHash' : 167400391896309758, 'imX': 500 }), ( 'lolcat-oregon-trail.jpg', { 'hexHash': '7227289a017988b6bdcf61fd4761f6b9', 'type': 'image/jpeg', 'imY': 356, 'pHash': -4955310669995365332, # 'dHash' : -8660145558008088574, 'imX': 501 }) ] archHashes = list(arch.iterHashes()) for item in archHashes: del item[1]['cont'] print(archHashes) self.assertEqual(archHashes, match)