예제 #1
0
 def test_is_file_dup(self):
     self.assertTrue(
         FileUtils.is_file_dup(
             "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg",
             "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001 copy.jpg"))
     self.assertFalse(
         FileUtils.is_file_dup(
             "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg",
             "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0002.jpg"))
     self.assertFalse(
         FileUtils.is_file_dup(
             "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", 2))
     self.assertFalse(
         FileUtils.is_file_dup(
             None, "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg"))
     self.assertFalse(
         FileUtils.is_file_dup(
             "", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg"))
     self.assertFalse(
         FileUtils.is_file_dup(
             "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0718.jpg",
             "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0730.jpg"))
     self.assertFalse(
         FileUtils.is_file_dup(
             "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg",
             "/Users/paulottley/Desktop/MomsDadsPhotos/images/IMG_0001.jpg")
     )
예제 #2
0
    def mark_duplicates(all_files):
        """

        :rtype: tuple[all_files, duplicates]
        """
        # Print start time
        ts = time.time()
        start_time = datetime.datetime.fromtimestamp(ts).strftime(
            '%Y-%m-%d %H:%M:%S')
        print("Mark Duplicates: " + start_time)

        duplicates = []
        dedupped_file_set = []

        # Make a copy to test
        all_files_test = all_files.copy()

        for x in all_files:
            for y in all_files_test:
                x_size = x.get_size()
                y_size = y.get_size()
                if x != y and x_size == y_size:
                    if FileUtils.is_file_dup(x.get_full_path(),
                                             y.get_full_path()):
                        duplicates.append(y)
                        all_files.remove(y)
                        all_files_test.remove(y)

        dedupped_file_set += all_files

        # Print end time
        ts2 = time.time()
        end_time = datetime.datetime.fromtimestamp(ts2).strftime(
            '%Y-%m-%d %H:%M:%S')
        print(
            "Mark Duplicates completed at {0} with {1} files collected".format(
                end_time, len(dedupped_file_set)))

        return dedupped_file_set, duplicates