def test_is_file_dup(self): self.assertTrue( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001 copy.jpg")) self.assertFalse( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0002.jpg")) self.assertFalse( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", 2)) self.assertFalse( FileUtils.is_file_dup( None, "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg")) self.assertFalse( FileUtils.is_file_dup( "", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg")) self.assertFalse( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0718.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0730.jpg")) self.assertFalse( FileUtils.is_file_dup( "/Users/paulottley/Desktop/MomsDadsPhotos/IMG_0001.jpg", "/Users/paulottley/Desktop/MomsDadsPhotos/images/IMG_0001.jpg") )
def mark_duplicates(all_files): """ :rtype: tuple[all_files, duplicates] """ # Print start time ts = time.time() start_time = datetime.datetime.fromtimestamp(ts).strftime( '%Y-%m-%d %H:%M:%S') print("Mark Duplicates: " + start_time) duplicates = [] dedupped_file_set = [] # Make a copy to test all_files_test = all_files.copy() for x in all_files: for y in all_files_test: x_size = x.get_size() y_size = y.get_size() if x != y and x_size == y_size: if FileUtils.is_file_dup(x.get_full_path(), y.get_full_path()): duplicates.append(y) all_files.remove(y) all_files_test.remove(y) dedupped_file_set += all_files # Print end time ts2 = time.time() end_time = datetime.datetime.fromtimestamp(ts2).strftime( '%Y-%m-%d %H:%M:%S') print( "Mark Duplicates completed at {0} with {1} files collected".format( end_time, len(dedupped_file_set))) return dedupped_file_set, duplicates