def test_removal(self): run = ContainerRun(id=42, state=ContainerRun.COMPLETE) expected_plan = {'ContainerRuns': {run}} plan = run.build_removal_plan() self.assertEqual(expected_plan, strip_removal_plan(plan))
def test_change_not_on_rerun(self): run1 = ContainerRun(md5='11111111111111111111111111111111') run2 = ContainerRun(md5='11111111111111111111111111111111', original_run=run1, state=ContainerRun.COMPLETE) self.assertEqual(False, run2.has_changed)
def test_change_on_fail(self): """ Only report changes on successfully completed runs. """ run1 = ContainerRun(md5='11111111111111111111111111111111') run2 = ContainerRun(md5='11111111111111111111111111111111', original_run=run1, state=ContainerRun.FAILED) self.assertIsNone(run2.has_changed)
def build_run(self): run = ContainerRun() run.app = ContainerApp() run.app.container = Container() run.app.container.file = Namespace(path='/tmp/foo.simg') run.sandbox_path = '/tmp/box23' run.app.arguments.create(type=ContainerArgument.INPUT, name='in_csv') run.app.arguments.create(type=ContainerArgument.OUTPUT, name='out_csv') return run
def test_removal_skips_inputs(self): run = ContainerRun(id=42, state=ContainerRun.COMPLETE) dataset = Dataset(id=43) argument = ContainerArgument(type=ContainerArgument.INPUT) run.datasets.create(dataset=dataset, argument=argument) expected_plan = {'ContainerRuns': {run}} plan = run.build_removal_plan() self.assertEqual(expected_plan, strip_removal_plan(plan))
def test_rerun_names(self): expectations = [('example', 'example (rerun)'), ('', '(rerun)'), ('example ', 'example (rerun)'), ('example (rerun)', 'example (rerun)'), ('example (rerun) ', 'example (rerun)'), ('example (rerun) weird', 'example (rerun) weird (rerun)')] run = ContainerRun() for name, expected_rerun_name in expectations: run.name = name rerun_name = run.get_rerun_name() self.assertEqual(expected_rerun_name, rerun_name)
def test_slurm_command_priority(self): run = ContainerRun(pk=99) run.user = User(username='******') run.app = ContainerApp() run.app.container = Container() run.app.container.family = ContainerFamily(name='my container') slurm_queues = (('low', 'kive-low'), ('medium', 'kive-medium'), ('high', 'kive-high')) run.priority = 2 run.sandbox_path = 'run23' expected_command = [ 'sbatch', '-J', 'r99 my container', '--parsable', '--output', '/tmp/kive_media/run23/logs/job%J_node%N_stdout.txt', '--error', '/tmp/kive_media/run23/logs/job%J_node%N_stderr.txt', '-c', '1', '--mem', '6000', '-p', 'kive-high', EXPECTED_MANAGE_PATH, 'runcontainer', '99'] command = run.build_slurm_command(slurm_queues) self.assertListEqual(expected_command, command)
def test_slurm_command_custom_memory(self): run = ContainerRun(pk=99) run.user = User(username='******') run.app = ContainerApp(threads=3, memory=100) run.app.container = Container() run.app.container.family = ContainerFamily(name='my container') run.sandbox_path = 'run23' expected_command = [ 'sbatch', '-J', 'r99 my container', '--parsable', '--output', '/tmp/kive_media/run23/logs/job%J_node%N_stdout.txt', '--error', '/tmp/kive_media/run23/logs/job%J_node%N_stderr.txt', '-c', '3', '--mem', '100', EXPECTED_MANAGE_PATH, 'runcontainer', '99'] command = run.build_slurm_command() self.assertListEqual(expected_command, command)
def test_build_dataset_name(self): run = ContainerRun(id=42) handler = runcontainer.Command() scenarios = [('example_csv', 'example_42.csv'), ('example_tar_gz', 'example_42.tar.gz'), ('csv', '42.csv'), ('_csv', '_42.csv'), ('_', '__42'), ('no_extension', 'no_extension_42')] for argument_name, expected_dataset_name in scenarios: dataset_name = handler.build_dataset_name(run, argument_name) self.assertEqual(expected_dataset_name, dataset_name)
def test_slurm_command_custom_memory(self): run = ContainerRun(pk=99) run.user = User(username='******') run.app = ContainerApp(threads=3, memory=100) run.app.container = Container() run.app.container.family = ContainerFamily(name='my container') run.sandbox_path = 'run23' expected_command = [ 'sbatch', '-J', 'r99 my container', '--parsable', '--output', '/tmp/kive_media/run23/logs/job%J_node%N_stdout.txt', '--error', '/tmp/kive_media/run23/logs/job%J_node%N_stderr.txt', '-c', '3', '--mem', '100', EXPECTED_MANAGE_PATH, 'runcontainer', '99' ] command = run.build_slurm_command() self.assertListEqual(expected_command, command)
def test_slurm_command_priority(self): run = ContainerRun(pk=99) run.user = User(username='******') run.app = ContainerApp() run.app.container = Container() run.app.container.family = ContainerFamily(name='my container') slurm_queues = (('low', 'kive-low'), ('medium', 'kive-medium'), ('high', 'kive-high')) run.priority = 2 run.sandbox_path = 'run23' expected_command = [ 'sbatch', '-J', 'r99 my container', '--parsable', '--output', '/tmp/kive_media/run23/logs/job%J_node%N_stdout.txt', '--error', '/tmp/kive_media/run23/logs/job%J_node%N_stderr.txt', '-c', '1', '--mem', '6000', '-p', 'kive-high', EXPECTED_MANAGE_PATH, 'runcontainer', '99' ] command = run.build_slurm_command(slurm_queues) self.assertListEqual(expected_command, command)
def retrieve(self, request, *args, **kwargs): pk = kwargs.get('pk') ContainerRun.check_slurm_state(pk) return super(ContainerRunViewSet, self).retrieve(request, *args, **kwargs)
def list(self, request, *args, **kwargs): ContainerRun.check_slurm_state() return super(ContainerRunViewSet, self).list(request, *args, **kwargs)
def get(self, request, *args, **kwargs): # noinspection PyTypeChecker pk = kwargs.get('pk') ContainerRun.check_slurm_state(pk) return super(ContainerRunUpdate, self).get(request, *args, **kwargs)
def test_change_on_new_run(self): run1 = ContainerRun(md5='11111111111111111111111111111111', state=ContainerRun.COMPLETE) self.assertIsNone(run1.has_changed)
def filter_granted(self, queryset): """ Logs don't have permissions, so filter by parent runs. """ granted_runs = ContainerRun.filter_by_user(self.request.user) return queryset.filter(run_id__in=granted_runs)
def test_remove_running(self): run = ContainerRun(id=42, state=ContainerRun.RUNNING) with self.assertRaisesRegex(ValueError, r'ContainerRun id 42 is still active.'): run.build_removal_plan()
def test_remove_running(self): run = ContainerRun(id=42, state=ContainerRun.RUNNING) with self.assertRaisesRegexp(ValueError, r'ContainerRun id 42 is still active.'): run.build_removal_plan()
def purge(self, start, stop, dataset_aging, log_aging, sandbox_aging, batch_size): logger.debug('Starting purge.') container_total = self.set_file_sizes(Container, 'file', 'file_size', 'created') sandbox_total = self.set_file_sizes(ContainerRun, 'sandbox_path', 'sandbox_size', 'end_time') log_total = self.set_file_sizes(ContainerLog, 'long_text', 'log_size', 'run__end_time') dataset_total = self.set_file_sizes(Dataset, 'dataset_file', 'dataset_size', 'date_created') total_storage = remaining_storage = (container_total + sandbox_total + log_total + dataset_total) if total_storage <= start: storage_text = self.summarize_storage(container_total, dataset_total, sandbox_total, log_total) logger.debug(u"No purge needed for %s: %s.", filesizeformat(total_storage), storage_text) return sandbox_ages = ContainerRun.find_unneeded().annotate( entry_type=Value('r', models.CharField()), age=ExpressionWrapper(sandbox_aging * (Now() - F('end_time')), output_field=DurationField())).values_list( 'entry_type', 'id', 'age').order_by() log_ages = ContainerLog.find_unneeded().annotate( entry_type=Value('l', models.CharField()), age=ExpressionWrapper(log_aging * (Now() - F('run__end_time')), output_field=DurationField())).values_list( 'entry_type', 'id', 'age').order_by() dataset_ages = Dataset.find_unneeded().annotate( entry_type=Value('d', models.CharField()), age=ExpressionWrapper(dataset_aging * (Now() - F('date_created')), output_field=FloatField())).values_list( 'entry_type', 'id', 'age').order_by() purge_counts = Counter() max_purge_dates = {} min_purge_dates = {} purge_entries = sandbox_ages.union(log_ages, dataset_ages, all=True).order_by('-age') while remaining_storage > stop: entry_count = 0 for entry_type, entry_id, age in purge_entries[:batch_size]: entry_count += 1 if entry_type == 'r': run = ContainerRun.objects.get(id=entry_id) entry_size = run.sandbox_size entry_date = run.end_time logger.debug("Purged container run %d containing %s.", run.pk, filesizeformat(entry_size)) try: run.delete_sandbox() except OSError: logger.error( u"Failed to purge container run %d at %r.", run.id, run.sandbox_path, exc_info=True) run.sandbox_path = '' run.save() elif entry_type == 'l': log = ContainerLog.objects.get(id=entry_id) entry_size = log.log_size entry_date = log.run.end_time logger.debug("Purged container log %d containing %s.", log.id, filesizeformat(entry_size)) log.long_text.delete() else: assert entry_type == 'd' dataset = Dataset.objects.get(id=entry_id) entry_size = dataset.dataset_size dataset_total -= dataset.dataset_size entry_date = dataset.date_created logger.debug("Purged dataset %d containing %s.", dataset.pk, filesizeformat(entry_size)) dataset.dataset_file.delete() purge_counts[entry_type] += 1 purge_counts[entry_type + ' bytes'] += entry_size # PyCharm false positives... # noinspection PyUnresolvedReferences min_purge_dates[entry_type] = min( entry_date, min_purge_dates.get(entry_type, entry_date)) # noinspection PyUnresolvedReferences max_purge_dates[entry_type] = max( entry_date, max_purge_dates.get(entry_type, entry_date)) remaining_storage -= entry_size if remaining_storage <= stop: break if entry_count == 0: break for entry_type, entry_name in (('r', 'container run'), ('l', 'container log'), ('d', 'dataset')): purged_count = purge_counts[entry_type] if not purged_count: continue min_purge_date = min_purge_dates[entry_type] max_purge_date = max_purge_dates[entry_type] collective = entry_name + pluralize(purged_count) bytes_removed = purge_counts[entry_type + ' bytes'] start_text = naturaltime(min_purge_date) end_text = naturaltime(max_purge_date) date_range = (start_text if start_text == end_text else start_text + ' to ' + end_text) logger.info("Purged %d %s containing %s from %s.", purged_count, collective, filesizeformat(bytes_removed), date_range) if remaining_storage > stop: storage_text = self.summarize_storage(container_total, dataset_total) logger.error('Cannot reduce storage to %s: %s.', filesizeformat(stop), storage_text)
def purge(self, start, stop, dataset_aging, log_aging, sandbox_aging, batch_size): logger.debug('Starting purge.') container_total = self.set_file_sizes(Container, 'file', 'file_size', 'created') sandbox_total = self.set_file_sizes(ContainerRun, 'sandbox_path', 'sandbox_size', 'end_time') log_total = self.set_file_sizes(ContainerLog, 'long_text', 'log_size', 'run__end_time') dataset_total = self.set_file_sizes(Dataset, 'dataset_file', 'dataset_size', 'date_created') total_storage = remaining_storage = ( container_total + sandbox_total + log_total + dataset_total) if total_storage <= start: storage_text = self.summarize_storage(container_total, dataset_total, sandbox_total, log_total) logger.debug(u"No purge needed for %s: %s.", filesizeformat(total_storage), storage_text) return sandbox_ages = ContainerRun.find_unneeded().annotate( entry_type=Value('r', models.CharField()), age=sandbox_aging * (Now() - F('end_time'))).values_list( 'entry_type', 'id', 'age').order_by() log_ages = ContainerLog.find_unneeded().annotate( entry_type=Value('l', models.CharField()), age=log_aging * (Now() - F('run__end_time'))).values_list( 'entry_type', 'id', 'age').order_by() dataset_ages = Dataset.find_unneeded().annotate( entry_type=Value('d', models.CharField()), age=dataset_aging * (Now() - F('date_created'))).values_list( 'entry_type', 'id', 'age').order_by() purge_counts = Counter() max_purge_dates = {} min_purge_dates = {} purge_entries = sandbox_ages.union(log_ages, dataset_ages, all=True).order_by('-age') while remaining_storage > stop: entry_count = 0 for entry_type, entry_id, age in purge_entries[:batch_size]: entry_count += 1 if entry_type == 'r': run = ContainerRun.objects.get(id=entry_id) entry_size = run.sandbox_size entry_date = run.end_time logger.debug("Purged container run %d containing %s.", run.pk, filesizeformat(entry_size)) try: run.delete_sandbox() except OSError: logger.error(u"Failed to purge container run %d at %r.", run.id, run.sandbox_path, exc_info=True) run.sandbox_path = '' run.save() elif entry_type == 'l': log = ContainerLog.objects.get(id=entry_id) entry_size = log.log_size entry_date = log.run.end_time logger.debug("Purged container log %d containing %s.", log.id, filesizeformat(entry_size)) log.long_text.delete() else: assert entry_type == 'd' dataset = Dataset.objects.get(id=entry_id) entry_size = dataset.dataset_size dataset_total -= dataset.dataset_size entry_date = dataset.date_created logger.debug("Purged dataset %d containing %s.", dataset.pk, filesizeformat(entry_size)) dataset.dataset_file.delete() purge_counts[entry_type] += 1 purge_counts[entry_type + ' bytes'] += entry_size # PyCharm false positives... # noinspection PyUnresolvedReferences min_purge_dates[entry_type] = min(entry_date, min_purge_dates.get(entry_type, entry_date)) # noinspection PyUnresolvedReferences max_purge_dates[entry_type] = max(entry_date, max_purge_dates.get(entry_type, entry_date)) remaining_storage -= entry_size if remaining_storage <= stop: break if entry_count == 0: break for entry_type, entry_name in (('r', 'container run'), ('l', 'container log'), ('d', 'dataset')): purged_count = purge_counts[entry_type] if not purged_count: continue min_purge_date = min_purge_dates[entry_type] max_purge_date = max_purge_dates[entry_type] collective = entry_name + pluralize(purged_count) bytes_removed = purge_counts[entry_type + ' bytes'] start_text = naturaltime(min_purge_date) end_text = naturaltime(max_purge_date) date_range = (start_text if start_text == end_text else start_text + ' to ' + end_text) logger.info("Purged %d %s containing %s from %s.", purged_count, collective, filesizeformat(bytes_removed), date_range) if remaining_storage > stop: storage_text = self.summarize_storage(container_total, dataset_total) logger.error('Cannot reduce storage to %s: %s.', filesizeformat(stop), storage_text)