def test_clean_up_priority_list_3_dcms(self): with mock.patch.object(DataCleaner, 'scan_dir'), \ mock.patch.object(DataCleaner, '_get_file_stats') as mock_files, \ mock.patch.object(DataCleaner, '_get_current_time') as mock_time: # test that 1. dcm files are removed on a whole mock_files.return_value = [ ('folder1/0001.dcm', 0, 10), ('folder1/0002.dcm', 0, 10), ('folder1/0003.dcm', 0, 10), ('folder1/0004.dcm', 0, 10), ('folder2/0001.dcm', 10, 10), ('folder2/0002.dcm', 10, 10), ('folder2/folder3/file1.nii', 10, 10), ('folder3/0001.dcm', 5, 10), ('folder3/0002.dcm', 5, 10), ('folder3/t.db', 10, 10), ] mock_time.return_value = 20 dc_instance = DataCleaner( folder='', folder_size_soft_limit=1.0 * 55 / 1024 / 1024, folder_size_hard_limit=1.0 * 55 / 1024 / 1024, max_data_seconds=-1, whitelist=[], priority_list=['*.dcm']) removed = dc_instance.clean_up(dry_run=True) self.assertEqual([('folder1/0001.dcm', 0, 10), ('folder1/0002.dcm', 0, 10), ('folder1/0003.dcm', 0, 10), ('folder1/0004.dcm', 0, 10), ('folder3/0001.dcm', 5, 10), ('folder3/0002.dcm', 5, 10)], removed)
def test_scalability_2(self): # test that the function does not take too long list_of_folders = [str(i) for i in range(100)] dcm_files = ['{}.dcm'.format(i) for i in range(200)] filelist = [(os.path.join(a, b), 0, 1) for a, b in itertools.product(list_of_folders, dcm_files)] s_time = time.time() DataCleaner.clean_files_by_size_optimized(filelist, reduce_size=100000000, pattern='*dcm') e_time = time.time() self.assertLess(e_time - s_time, 1.2)
def test_soft_hard_limit_2(self): with mock.patch.object(DataCleaner, 'scan_dir'), \ mock.patch.object(DataCleaner, '_get_file_stats') as mock_files, \ mock.patch.object(DataCleaner, '_get_current_time') as mock_time: mock_files.return_value = [('file1', 15, 30), ('file2', 5, 10), ('file3', 11, 30), ('file4', 13, 30)] mock_time.return_value = 20 dc_instance = DataCleaner( folder='', folder_size_soft_limit=1.0 * 40 / 1024 / 1028, folder_size_hard_limit=1.0 * 110 / 1024 / 1028, max_data_seconds=-1, whitelist=[''], priority_list=['file*']) removed = dc_instance.clean_up(dry_run=True) self.assertEqual([], removed)
def test_remove_empty_folder_from_base_folder_1(self): try: base_folder = tempfile.mkdtemp() removed = DataCleaner.remove_empty_folder_from_base_folder( base_folder) self.assertEqual([], removed) finally: shutil.rmtree(base_folder)
def test_do_not_clean_young_files(self): with mock.patch.object(DataCleaner, 'scan_dir'), \ mock.patch.object(DataCleaner, '_get_file_stats') as mock_files, \ mock.patch.object(DataCleaner, '_get_current_time') as mock_time: mock_files.return_value = [('file1', 15, 30), ('file2', 5, 10), ('file3', 11, 30), ('file4', 13, 30)] mock_time.return_value = 20 # file2 is 15 seconds old # file4 is 7 seconds old dc_instance = DataCleaner(folder='', folder_size_soft_limit=1024 * 1024, folder_size_hard_limit=1024 * 1024, max_data_seconds=10, whitelist=['file1', 'file3'], blacklist=['file*'], min_data_seconds=8) removed = dc_instance.clean_up(dry_run=True) self.assertEqual([('file2', 5, 10)], removed)
def test_clean_files_by_date_2(self): with mock.patch.object(DataCleaner, '_get_current_time') as mock_time: mock_time.return_value = 10 filelist = [('file1', 0, 0), ('file2', 3, 0), ('file3', 5, 0), ('file4', 7, 0)] self.assertEqual([('file1', 0, 0), ('file2', 3, 0)], DataCleaner.clean_files_by_date( filelist, 6, [], ['*file*'])) self.assertEqual([('file3', 5, 0), ('file4', 7, 0)], filelist)
def test__creation_time_and_size(self): class mock_class(): def __init__(self, time, size): self.st_ctime = time self.st_size = size with mock.patch('os.stat') as mock_stat: mock_stat.return_value = mock_class('time', 'size') self.assertEqual(('file1', 'time', 'size'), DataCleaner._creation_time_and_size('file1'))
def test_remove_empty_folder_from_base_folder_2(self): try: base_folder = tempfile.mkdtemp() tmp1 = tempfile.mkdtemp(dir=base_folder) tmp2 = tempfile.mkdtemp(dir=base_folder) tmp3 = tempfile.mkdtemp(dir=tmp1) tempfile.mkstemp(dir=tmp2) removed = DataCleaner.remove_empty_folder_from_base_folder( base_folder) self.assertEqual([tmp3, tmp1], removed) finally: shutil.rmtree(base_folder)
def test_clean_up_priority_list_2(self): with mock.patch.object(DataCleaner, 'scan_dir'), \ mock.patch.object(DataCleaner, '_get_file_stats') as mock_files, \ mock.patch.object(DataCleaner, '_get_current_time') as mock_time: # test that 1. files not in priority_list are not removed # (t.db not removed) # 2. files removed are in the order of the priority list # (old*.nii removed first) # 3. files on the whitelist are not removed # (not removing file1.nii and file3.nii) # 4. stop the removing process early if size requirements met # (0004.dcm not removed) mock_files.return_value = [ ('folder1/0001.png', 0, 10), ('folder1/0002.png', 0, 10), ('folder1/0003.png', 0, 10), ('folder1/0004.png', 0, 10), ('folder1/folder2/file1.nii', 10, 30), ('folder1/folder2/old_file2.nii', 10, 30), ('folder1/folder2/old_file3.nii', 10, 30), ('folder1/folder2/file4.nii', 10, 30), ('folder2/t.db', 10, 40), ] mock_time.return_value = 20 dc_instance = DataCleaner( folder='', folder_size_soft_limit=1.0 * 115 / 1024 / 1024, folder_size_hard_limit=1.0 * 115 / 1024 / 1024, max_data_seconds=-1, whitelist=['*file1.nii', '*file3.nii'], priority_list=['*old*.nii', '*nii', '*.png', 'file*']) removed = dc_instance.clean_up(dry_run=True) # TODO file should be ideally deleted only once self.assertEqual([('folder1/folder2/old_file2.nii', 10, 30), ('folder1/folder2/file4.nii', 10, 30), ('folder1/folder2/old_file2.nii', 10, 30)], removed)
def test_clean_file_folder(self): filelist = [ ('folder1/file1.dcm', 0, 1), ('folder2/file2.dcm', 0, 3), ('folder1/file3.dcm', 0, 5), ('folder1/file4.nii', 0, 7), ('folder1/file5.dcm', 0, 9), ] removed, removed_index, removed_size = DataCleaner.clean_file_folder( filelist, 'folder1/file1.dcm', [], ['*.dcm']) self.assertEqual([ ('folder1/file3.dcm', 0, 5), ('folder1/file5.dcm', 0, 9), ], removed) self.assertEqual([2, 4], removed_index) self.assertEqual(14, removed_size)
def test__sort_filestat_list_1(self): self.assertEqual([], DataCleaner._sort_filestat_list_by_time([]))
def test__sum_filestat_list_3(self): self.assertEqual( 3, DataCleaner._sum_filestat_list([("duh", 0, 1), ("brah", 0, 2)]))
def test_clean_files_by_date_1(self): self.assertEqual([], DataCleaner.clean_files_by_date([], 0, [], []))
def test__sum_filestat_list_2(self): self.assertEqual(1, DataCleaner._sum_filestat_list([("duh", 0, 1)]))
def test_remove_files_file_nonexistent(self): fail_list = DataCleaner.remove_files([('mockpath/that/does/not/exist', 0, 0)]) self.assertEqual(['mockpath/that/does/not/exist'], fail_list)
def test__remove_from_file_list_3(self): filelist = [0, 1, 2, 3] DataCleaner._remove_from_file_list(filelist, [0, 2]) self.assertEqual([1, 3], filelist)
def test__check_remove_time_False(self): with mock.patch.object(DataCleaner, '_get_current_time') as mock_time: mock_time.return_value = 1 self.assertFalse(DataCleaner._check_remove_time(0, 1))
def test_clean_files_by_size_1(self): self.assertEqual([], DataCleaner.clean_files_by_size_optimized([], 1, [], []))
def test_check_valid_init(self): DataCleaner(None, 0, 0, 0, -1, None, ['*.nii'], ['test.nii'])
def test__sort_filestat_list_2(self): filelist = [('file1', 0, 0)] self.assertEqual(filelist, DataCleaner._sort_filestat_list_by_time(filelist))
def test__sort_filestat_list_3(self): filelist = [('file1', 1, 0), ('file2', 0, 1)] self.assertEqual(filelist[::-1], DataCleaner._sort_filestat_list_by_time(filelist))
def test__fnmatch_1(self): self.assertFalse(DataCleaner._fnmatch('test.nii', []))
def test__remove_from_file_list_2(self): filelist = [0] DataCleaner._remove_from_file_list(filelist, [0]) self.assertEqual([], filelist)
def test__fnmatch_3(self): self.assertFalse(DataCleaner._fnmatch('test.nii', ['*.dcm']))
def test__remove_from_file_list_4(self): filelist = [0, 1, 2, 3, 4, 5, 6] DataCleaner._remove_from_file_list(filelist, [0, 0, 4, 2, 5, 1]) self.assertEqual([3, 6], filelist)
def test_clean_files_by_size_whitelist(self): filelist = [('file1', 0, 10), ('file2', 0, 10), ('file3', 0, 10), ('file4', 0, 10)] removed = DataCleaner.clean_files_by_size_optimized( filelist, 15, ['file1'], 'file*') self.assertEqual([('file2', 0, 10), ('file3', 0, 10)], removed)
def test__fnmatch_2(self): self.assertTrue(DataCleaner._fnmatch('test.nii', ['*.dcm', '*.nii']))
def test__sum_filestat_list_1(self): self.assertEqual(0, DataCleaner._sum_filestat_list([]))
def test__check_remove_filter(self): self.assertFalse(DataCleaner._check_remove_filter('test.nii', [], []))
def test__check_remove_filter6(self): """Both whitelist and blacklist""" self.assertTrue( DataCleaner._check_remove_filter('test.nii', ['not_test.nii'], ['*.nii']))