コード例 #1
0
    def setUpClass(cls):
        np.random.seed(0)
        cls._imagePaths = createRandomTestImages(cls._testBasePath, 10, 10, 0,
                                                 5)
        cls._simpleFilesMap1 = {
            dff.calculateMD5Hash(img): img
            for img in cls._imagePaths
        }
        cls._simpleFilesMap1File = os.path.join(cls._testBasePath,
                                                'filesmap1.json')
        with open(cls._simpleFilesMap1File, 'w') as fp:
            json.dump(cls._simpleFilesMap1, fp)

        cls._imagePaths.extend(
            createRandomTestImages(os.path.join(cls._testBasePath, 'folder1'),
                                   10, 10, 0, 5))
        cls._imagePaths.extend(
            createRandomTestImages(os.path.join(cls._testBasePath, 'folder2'),
                                   10, 10, 0, 5))
        cls._imagePaths.extend(
            createRandomTestImages(
                os.path.join(cls._testBasePath, 'folder1', 'folder3'), 10, 10,
                0, 5))
        cls._imagePaths.extend(
            createRandomTestImages(
                os.path.join(cls._testBasePath, 'folder2', 'folder4'), 10, 10,
                0, 5))
        cls._complexFilesMap1 = {
            dff.calculateMD5Hash(img): img
            for img in cls._imagePaths
        }
        cls._complexFilesMap1File = os.path.join(cls._testBasePath,
                                                 'complexFilesMap.json')
        with open(cls._complexFilesMap1File, 'w') as fp:
            json.dump(cls._complexFilesMap1, fp)
コード例 #2
0
 def test_calculateMD5HashWorks(self):
     ## small image
     self.assertEqual(dff.calculateMD5Hash(self._testImgs[0]),
                      "5b4f89add29ae4ad253ce90b24ca132c")
     ## big image
     self.assertEqual(dff.calculateMD5Hash(self._testImgs[1]),
                      "7c4143dee5870f2dc5aebc7be1a42e32")
コード例 #3
0
    def test_checkForDuplicatesWorksWithNoDuplicateFiles(self):
        knownFilesMap = {
            dff.calculateMD5Hash(f): dff.File(f)
            for f in self._imagePaths
        }

        inputFilesList = dff.buildInputFilesList([
            os.path.join(self._testBasePath, 'folder1', 'folder3'),
            os.path.join(self._testBasePath, 'folder2', 'folder4'),
        ], knownFilesMap)

        filesMap = copy.deepcopy(knownFilesMap)
        dff.checkForDuplicates(inputFilesList, filesMap)
        self.assertEqual(
            len(self._imagePaths3) + len(self._imagePaths4) +
            len(knownFilesMap), len(filesMap))
        self.assertEqual(
            sorted(self._imagePaths3 + self._imagePaths4 +
                   [f._original for f in knownFilesMap.values()]),
            sorted([f._original for f in filesMap.values()]))

        previouslyKnownFiles = [v._original for v in knownFilesMap.values()]
        for f in filesMap.values():
            self.assertTrue(f._original in previouslyKnownFiles
                            or f._original in self._imagePaths3
                            or f._original in self._imagePaths4)
            self.assertFalse(f._duplicates)
コード例 #4
0
 def test_checkForDuplicatesWorksWithDuplicateFilesInDifferentFolders(self):
     inputFilesList = dff.buildInputFilesList([self._testBasePath], {})
     filesMap = {}
     dff.checkForDuplicates(inputFilesList, filesMap)
     self.assertEqual(len(filesMap), 40)
     originalFileHashes = [dff.calculateMD5Hash(f) for f in self._originals]
     self.assertEqual(len(set(originalFileHashes)), len(originalFileHashes))
     self.assertEqual(sorted(originalFileHashes),
                      sorted(list(filesMap.keys())))
     self.assertEqual(sorted([f._original for f in filesMap.values()]),
                      sorted(self._originals))
     ##assert duplicate file count matches
     duplicateFiles = [
         f for dupFiles in filesMap.values() for f in dupFiles._duplicates
     ]
     self.assertEqual(len(duplicateFiles), 45)
     for df in duplicateFiles:
         self.assertNotIn(df, self._originals)
コード例 #5
0
    def setUpClass(cls):
        np.random.seed(0)
        cls._imagePaths = createRandomTestImages(cls._testBasePath, 10, 10, 0,
                                                 5)
        cls._imagePaths1 = createRandomTestImages(
            os.path.join(cls._testBasePath, 'folder1'), 10, 10, 5, 5)
        cls._imagePaths2 = createRandomTestImages(
            os.path.join(cls._testBasePath, 'folder2'), 10, 10, 10, 5)
        cls._imagePaths3 = createRandomTestImages(
            os.path.join(cls._testBasePath, 'folder1', 'folder3'), 10, 10, 15,
            5)
        cls._imagePaths4 = createRandomTestImages(
            os.path.join(cls._testBasePath, 'folder2', 'folder4'), 10, 10, 20,
            10)

        cls._knownFilesMap = {
            dff.calculateMD5Hash(f): dff.File(f)
            for f in cls._imagePaths
        }
コード例 #6
0
 def test_calculateMD5HashWorksOnEmptyFiles(self):
     ## md5 hash for empty '' is d41d8cd98f00b204e9800998ecf8427e
     self.assertEqual(dff.calculateMD5Hash(self._emptyTestImg),
                      "d41d8cd98f00b204e9800998ecf8427e")