def test_validate_arches(self):

        got = {}
        for arch_name in arches:
            cwd = os.path.dirname(os.path.realpath(__file__))
            archPath = os.path.join(cwd, 'test_ptree_base', arch_name)

            arch = pArch.PhashArchive(archPath)

            archHashes = list(arch.iterHashes())
            for item in archHashes:
                del item[1]['cont']
            got[arch_name] = archHashes

        expect_keys = list(expect.keys())
        got_keys = list(got.keys())

        expect_keys.sort()
        got_keys.sort()

        self.assertEqual(expect_keys, got_keys)

        for key in expect_keys:
            if got[key] != expect_keys:
                print("Key:", key)
                pprint.pprint(got[key])
                pprint.pprint(expect[key])
            self.assertEqual(expect[key], got[key])
	def __init__(self, archPath, pathFilter=None):
		'''
		Params:
			pathFilter (list): default =``['']``
				Basically, if you pass a list of valid path prefixes, any matches not
				on any of those path prefixes are not matched.
				Default is [''], which matches every path, because "anything".startswith('') is true
		'''

		super().__init__()
		self.maskedPaths = pathFilter or ['']

		self.archPath    = archPath
		self.arch        = pArch.PhashArchive(archPath)

		self.log = logging.getLogger("Main.Deduper")
		self.log.info("ArchChecker Instantiated on '%s'", archPath)
Esempio n. 3
0
    def test_pArch_2(self):
        cwd = os.path.dirname(os.path.realpath(__file__))
        archPath = os.path.join(cwd, 'testArches', 'testArches.zip')

        arch = pArch.PhashArchive(archPath)

        ret = arch.getHashInfo('dangerous-to-go-alone.jpg')
        ret.pop('cont')
        expect = {
            # 'dHash': 4576150637722077151,
            'hexHash': 'dcd6097eeac911efed3124374f44085b',
            'imX': 325,
            'pHash': -149413575039568585,
            'type': 'image/jpeg',
            'imY': 307
        }

        self.assertEqual(ret, expect)
Esempio n. 4
0
    def __init__(self,
                 archPath,
                 pathNegativeFilter=None,
                 pathPositiveFilter=None,
                 negativeKeywords=None):
        '''
		Params:
			pathNegativeFilter (list): default =``[]``
				List of paths to exclude from matching.
				By default, and empty list, leading to all possible paths being used.
		'''

        super().__init__()
        self.negativeMaskedPaths = pathNegativeFilter or []
        self.positiveMaskedPaths = pathPositiveFilter or []
        self.negativeKeywords = negativeKeywords or []
        self.archPath = archPath
        self.arch = pArch.PhashArchive(archPath)

        self.log = logging.getLogger("Main.Deduper")
        self.log.info("ArchChecker Instantiated on '%s'", archPath)
Esempio n. 5
0
    def test_pArch_4(self):
        cwd = os.path.dirname(os.path.realpath(__file__))
        archPath = os.path.join(cwd, 'testArches', 'testArches.7z')

        arch = pArch.PhashArchive(archPath)

        ret = arch.getHashInfo('dangerous-to-go-alone.jpg')

        # We're not confirming image contents here, so pop that from the list because it's
        # enormous, and I don't want to insert that into the file.
        ret.pop('cont')

        expect = {
            # 'dHash': 4576150637722077151,
            'hexHash': 'dcd6097eeac911efed3124374f44085b',
            'imX': 325,
            'pHash': -149413575039568585,
            'type': 'image/jpeg',
            'imY': 307
        }

        self.assertEqual(ret, expect)
Esempio n. 6
0
    def test_pArch_1(self):
        cwd = os.path.dirname(os.path.realpath(__file__))
        archPath = os.path.join(cwd, 'testArches', 'testArches.zip')

        arch = pArch.PhashArchive(archPath)

        match = [
            (
                'Lolcat_this_is_mah_job.jpg',
                {
                    'hexHash': 'd9ceeb6b43c2d7d096532eabfa6cf482',
                    'type': 'image/jpeg',
                    'imY': 389,
                    'pHash': 27427800275512429,
                    # 'dHash'   : -4504585791368671746,
                    'imX': 493
                }),
            (
                'Lolcat_this_is_mah_job.png',
                {
                    'hexHash': '1268e704908cc39299d73d6caafc23a0',
                    'type': 'image/png',
                    'imY': 389,
                    'pHash': 27427800275512429,
                    # 'dHash'   : -4504585791368671746,
                    'imX': 493
                }),
            (
                'Lolcat_this_is_mah_job_small.jpg',
                {
                    'hexHash': '40d39c436e14282dcda06e8aff367307',
                    'type': 'image/jpeg',
                    'imY': 237,
                    'pHash': 27427800275512429,
                    # 'dHash'   : -4504585791368671746,
                    'imX': 300
                }),
            (
                'dangerous-to-go-alone.jpg',
                {
                    'hexHash': 'dcd6097eeac911efed3124374f44085b',
                    'type': 'image/jpeg',
                    'imY': 307,
                    'pHash': -149413575039568585,
                    # 'dHash'   : 4576150637722077151,
                    'imX': 325
                }),
            (
                'lolcat-crocs.jpg',
                {
                    'hexHash': '6d0a977694630ac9d1d33a7f068e10f8',
                    'type': 'image/jpeg',
                    'imY': 363,
                    'pHash': -5569898607211671279,
                    # 'dHash'   : 167400391896309758,
                    'imX': 500
                }),
            (
                'lolcat-oregon-trail.jpg',
                {
                    'hexHash': '7227289a017988b6bdcf61fd4761f6b9',
                    'type': 'image/jpeg',
                    'imY': 356,
                    'pHash': -4955310669995365332,
                    # 'dHash'   : -8660145558008088574,
                    'imX': 501
                })
        ]

        archHashes = list(arch.iterHashes())
        for item in archHashes:
            del item[1]['cont']
        print(archHashes)
        self.assertEqual(archHashes, match)