def process_event(self, event, last_event=None): """See :meth:`job.clock.ClockEventProcessor.process_event`. Compares the new event with the last event any missing metrics jobs. """ # Attempt to get the daily metrics job type try: job_type = JobType.objects.filter(name='scale-daily-metrics').last() except JobType.DoesNotExist: raise ClockEventError('Missing required job type: scale-daily-metrics') if last_event: # Build a list of days that require metrics day_count = xrange((event.occurred.date() - last_event.occurred.date()).days) days = [last_event.occurred.date() + datetime.timedelta(days=d) for d in day_count] else: # Use the previous day when first triggered days = [timezone.now().date() - datetime.timedelta(days=1)] # Schedule one job for each required day for day in days: job_data = JobData() job_data.add_property_input('Day', day.strftime('%Y-%m-%d')) Queue.objects.queue_new_job(job_type, job_data, event)
def test_json(self): """Tests coverting a CancelJobs message to and from JSON""" when = now() data = JobData() job_type = job_test_utils.create_seed_job_type() job_1 = job_test_utils.create_job(job_type=job_type, status='PENDING') job_2 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', input=data.get_dict()) job_ids = [job_1.id, job_2.id] # Add jobs to message message = CancelJobs() message.when = when if message.can_fit_more(): message.add_job(job_1.id) if message.can_fit_more(): message.add_job(job_2.id) # Convert message to JSON and back, and then execute message_json_dict = message.to_json() new_message = CancelJobs.from_json(message_json_dict) result = new_message.execute() self.assertTrue(result) jobs = Job.objects.filter(id__in=job_ids).order_by('id') # Both jobs should have been canceled self.assertEqual(jobs[0].status, 'CANCELED') self.assertEqual(jobs[0].last_status_change, when) self.assertEqual(jobs[1].status, 'CANCELED') self.assertEqual(jobs[1].last_status_change, when) # No new messages since these jobs do not belong to a recipe self.assertEqual(len(new_message.new_messages), 0)
def queue_scan(self, scan_id, dry_run=True): """Retrieves a Scan model and uses metadata to place a job to run the Scan process on the queue. All changes to the database will occur in an atomic transaction. :param scan_id: The unique identifier of the Scan process. :type scan_id: int :param dry_run: Whether the scan will execute as a dry run :type dry_run: bool :returns: The new Scan process :rtype: :class:`ingest.models.Scan` """ scan = Scan.objects.select_for_update().get(pk=scan_id) scan_type = self.get_scan_job_type() job_data = JobData() job_data.add_property_input('Scan ID', str(scan.id)) job_data.add_property_input('Dry Run', str(dry_run)) event_description = {'scan_id': scan.id} if scan.job: raise ScanIngestJobAlreadyLaunched if dry_run: event = TriggerEvent.objects.create_trigger_event('DRY_RUN_SCAN_CREATED', None, event_description, now()) scan.dry_run_job = Queue.objects.queue_new_job(scan_type, job_data, event) else: event = TriggerEvent.objects.create_trigger_event('SCAN_CREATED', None, event_description, now()) scan.job = Queue.objects.queue_new_job(scan_type, job_data, event) scan.save() return scan
def _start_ingest_task(self, ingest): """Starts a task for the given ingest in an atomic transaction :param ingest: The ingest model :type ingest: :class:`ingest.models.Ingest` """ logger.info('Creating ingest task for %s', ingest.file_name) # Create new ingest job and mark ingest as QUEUED ingest_job_type = Ingest.objects.get_ingest_job_type() data = JobData() data.add_property_input('Ingest ID', str(ingest.id)) desc = {'strike_id': self.strike_id, 'file_name': ingest.file_name} when = ingest.transfer_ended if ingest.transfer_ended else now() event = TriggerEvent.objects.create_trigger_event('STRIKE_TRANSFER', None, desc, when) job_configuration = JobConfiguration() if ingest.workspace: job_configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) if ingest.new_workspace: job_configuration.add_job_task_workspace(ingest.new_workspace.name, MODE_RW) ingest_job = Queue.objects.queue_new_job(ingest_job_type, data, event, job_configuration) ingest.job = ingest_job ingest.status = 'QUEUED' ingest.save() logger.info('Successfully created ingest task for %s', ingest.file_name)
def test_json(self): """Tests coverting a RequeueJobsBulk message to and from JSON""" sys_err = error_test_utils.create_error(category='SYSTEM') data = JobData() job_type = job_test_utils.create_job_type() job_1 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err, input=data.get_dict()) job_2 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='CANCELED', error=sys_err, input=data.get_dict()) # Create message message = RequeueJobsBulk() message.started = job_1.last_modified - timedelta(seconds=1) message.ended = job_1.last_modified + timedelta(seconds=1) message.error_categories = ['SYSTEM'] message.error_ids = [sys_err.id] message.job_ids = [job_1.id] message.job_type_ids = [job_type.id] message.priority = 1 message.status = 'FAILED' # Convert message to JSON and back, and then execute message_json_dict = message.to_json() new_message = RequeueJobsBulk.from_json(message_json_dict) result = new_message.execute() self.assertTrue(result) # Should be one re-queue message for job 1 self.assertEqual(len(new_message.new_messages), 1) message = new_message.new_messages[0] self.assertEqual(message.type, 'requeue_jobs') self.assertListEqual(message._requeue_jobs, [QueuedJob(job_1.id, job_1.num_exes)]) self.assertEqual(message.priority, 1)
def _handle_job_finished(self, job_exe): """Handles a job execution finishing (reaching a final status of COMPLETED, FAILED, or CANCELED). The caller must have obtained a model lock on the given job_exe model. All database changes occur in an atomic transaction. :param job_exe: The job execution that finished :type job_exe: :class:`job.models.JobExecution` """ if not job_exe.is_finished: raise Exception('Job execution is not finished in status %s' % job_exe.status) # Start a cleanup job if this execution requires it if job_exe.requires_cleanup: if job_exe.cleanup_job: raise Exception('Job execution already has a cleanup job') cleanup_type = JobType.objects.get_cleanup_job_type() data = JobData() data.add_property_input('Job Exe ID', str(job_exe.id)) desc = {'job_exe_id': job_exe.id, 'node_id': job_exe.node_id} event = TriggerEvent.objects.create_trigger_event( 'CLEANUP', None, desc, timezone.now()) cleanup_job_id = Queue.objects.queue_new_job( cleanup_type, data, event).id job_exe.cleanup_job_id = cleanup_job_id job_exe.save()
def process_event(self, event, last_event=None): """See :meth:`job.clock.ClockEventProcessor.process_event`. Compares the new event with the last event any missing metrics jobs. """ # Attempt to get the daily metrics job type try: job_type = JobType.objects.filter( name='scale-daily-metrics').last() except JobType.DoesNotExist: raise ClockEventError( 'Missing required job type: scale-daily-metrics') if last_event: # Build a list of days that require metrics day_count = xrange( (event.occurred.date() - last_event.occurred.date()).days) days = [ last_event.occurred.date() + datetime.timedelta(days=d) for d in day_count ] else: # Use the previous day when first triggered days = [timezone.now().date() - datetime.timedelta(days=1)] # Schedule one job for each required day for day in days: job_data = JobData() job_data.add_property_input('Day', day.strftime('%Y-%m-%d')) Queue.objects.queue_new_job(job_type, job_data, event)
def _handle_job_finished(self, job_exe): """Handles a job execution finishing (reaching a final status of COMPLETED, FAILED, or CANCELED). The caller must have obtained a model lock on the given job_exe model. All database changes occur in an atomic transaction. :param job_exe: The job execution that finished :type job_exe: :class:`job.models.JobExecution` """ if not job_exe.is_finished: raise Exception('Job execution is not finished in status %s' % job_exe.status) # Start a cleanup job if this execution requires it if job_exe.requires_cleanup: if job_exe.cleanup_job: raise Exception('Job execution already has a cleanup job') cleanup_type = JobType.objects.get_cleanup_job_type() data = JobData() data.add_property_input('Job Exe ID', str(job_exe.id)) desc = {'job_exe_id': job_exe.id, 'node_id': job_exe.node_id} event = TriggerEvent.objects.create_trigger_event('CLEANUP', None, desc, timezone.now()) cleanup_job_id = Queue.objects.queue_new_job(cleanup_type, data, event).id job_exe.cleanup_job_id = cleanup_job_id job_exe.save()
def populate_job_configuration(apps, schema_editor): from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model("job", "Job") ScaleFile = apps.get_model("storage", "ScaleFile") Workspace = apps.get_model("storage", "Workspace") total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print "Populating new configuration field for %s jobs" % str(total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print "Completed %s of %s jobs (%f%%)" % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related("job_type").order_by("id")[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related("workspace").iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = JobConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace(workspace.name, MODE_RW) elif job.job_type.name == "scale-ingest": ingest_id = data.get_property_values(["Ingest ID"])["Ingest ID"] from ingest.models import Ingest ingest = Ingest.objects.select_related("workspace").get(id=ingest_id) configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print "All %s jobs completed" % str(total_count)
def process_parse(self, source_file): '''Processes the given source file parse by creating the appropriate jobs if the rule is triggered. All database changes are made in an atomic transaction. :param source_file_id: The source file that was parsed :type source_file_id: :class:`source.models.SourceFile` ''' # If this parse file has the correct media type or the correct data types, the rule is triggered media_type_match = not self._media_type or self._media_type == source_file.media_type data_types_match = not self._data_types or self._data_types <= source_file.get_data_type_tags() if not media_type_match or not data_types_match: return msg = 'Parse rule for ' if not self._media_type: msg += 'all media types ' else: msg += 'media type %s ' % self._media_type if self._data_types: msg += 'and data types %s ' % ','.join(self._data_types) msg += 'was triggered' logger.info(msg) event = ParseTriggerEvent(self._model, source_file).save_to_db() # Create triggered jobs for job in self._jobs_to_create: job_type = self._job_type_map[(job['job_type']['name'], job['job_type']['version'])] file_input_name = job['file_input_name'] job_data = JobData({}) job_data.add_file_input(file_input_name, source_file.id) # If workspace name has been provided, add that to the job data for each output file if 'workspace_name' in job: workspace = self._workspace_map[job['workspace_name']] job_type.get_job_interface().add_workspace_to_data(job_data, workspace.id) logger.info('Queuing new job of type %s %s', job_type.name, job_type.version) Queue.objects.queue_new_job(job_type, job_data.get_dict(), event) # Create triggered recipes for recipe in self._recipes_to_create: recipe_type = self._recipe_type_map[(recipe['recipe_type']['name'], recipe['recipe_type']['version'])] file_input_name = recipe['file_input_name'] recipe_data = RecipeData({}) recipe_data.add_file_input(file_input_name, source_file.id) # If workspace name has been provided, add that to the recipe data for each output file if 'workspace_name' in recipe: workspace = self._workspace_map[recipe['workspace_name']] recipe_data.set_workspace_id(workspace.id) logger.info('Queuing new recipe of type %s %s', recipe_type.name, recipe_type.version) Queue.objects.queue_new_recipe(recipe_type, recipe_data.get_dict(), event)
def populate_job_configuration(apps, schema_editor): from job.execution.configuration.json.exe_config import ExecutionConfiguration, MODE_RO, MODE_RW from job.configuration.data.job_data import JobData # Go through all of the job models that have job data and populate their configuration Job = apps.get_model('job', 'Job') ScaleFile = apps.get_model('storage', 'ScaleFile') Workspace = apps.get_model('storage', 'Workspace') total_count = Job.objects.all().count() workspaces = {} for workspace in Workspace.objects.all().iterator(): workspaces[workspace.id] = workspace print 'Populating new configuration field for %s jobs' % str(total_count) done_count = 0 batch_size = 1000 while done_count < total_count: percent = (float(done_count) / float(total_count)) * 100.00 print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count, percent) batch_end = done_count + batch_size for job in Job.objects.select_related('job_type').order_by('id')[done_count:batch_end]: # Ignore jobs that don't have their job data populated yet if not job.data: continue data = JobData(job.data) input_file_ids = data.get_input_file_ids() input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related('workspace').iterator() input_workspaces = set() for input_file in input_files: input_workspaces.add(input_file.workspace.name) configuration = ExecutionConfiguration() for name in input_workspaces: configuration.add_job_task_workspace(name, MODE_RO) if not job.job_type.is_system: for name in input_workspaces: configuration.add_pre_task_workspace(name, MODE_RO) # We add input workspaces to post task so it can perform a parse results move if requested by the # job's results manifest configuration.add_post_task_workspace(name, MODE_RW) for workspace_id in data.get_output_workspace_ids(): workspace = workspaces[workspace_id] if workspace.name not in input_workspaces: configuration.add_post_task_workspace(workspace.name, MODE_RW) elif job.job_type.name == 'scale-ingest': ingest_id = data.get_property_values(['Ingest ID'])['Ingest ID'] from ingest.models import Ingest ingest = Ingest.objects.select_related('workspace').get(id=ingest_id) configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) job.configuration = configuration.get_dict() job.save() done_count += batch_size print 'All %s jobs completed' % str(total_count)
def process_ingested_source_file(self, source_file, when): """Processes the given ingested source file by checking it against all ingest trigger rules and creating the corresponding jobs and recipes for any triggered rules. All database changes are made in an atomic transaction. :param source_file: The source file that was ingested :type source_file: :class:`source.models.SourceFile` :param when: When the source file was ingested :type when: :class:`datetime.datetime` """ msg = 'Processing trigger rules for ingested source file with media type %s and data types %s' logger.info(msg, source_file.media_type, str(list(source_file.get_data_type_tags()))) any_rules = False for entry in RecipeType.objects.get_active_trigger_rules(INGEST_TYPE): rule = entry[0] thing_to_create = entry[1] rule_config = rule.get_configuration() condition = rule_config.get_condition() if condition.is_condition_met(source_file): logger.info(condition.get_triggered_message()) any_rules = True event = self._create_ingest_trigger_event( source_file, rule, when) workspace = Workspace.objects.get( name=rule_config.get_workspace_name()) if isinstance(thing_to_create, JobType): job_type = thing_to_create job_data = JobData({}) job_data.add_file_input(rule_config.get_input_data_name(), source_file.id) job_type.get_job_interface().add_workspace_to_data( job_data, workspace.id) logger.info('Queuing new job of type %s %s', job_type.name, job_type.version) Queue.objects.queue_new_job(job_type, job_data, event) elif isinstance(thing_to_create, RecipeType): recipe_type = thing_to_create recipe_data = RecipeData({}) recipe_data.add_file_input( rule_config.get_input_data_name(), source_file.id) recipe_data.set_workspace_id(workspace.id) logger.info('Queuing new recipe of type %s %s', recipe_type.name, recipe_type.version) Queue.objects.queue_new_recipe(recipe_type, recipe_data, event) if not any_rules: logger.info('No rules triggered')
def create_batch(self, recipe_type, definition, title=None, description=None): """Creates a new batch that represents a group of recipes that should be scheduled for re-processing. This method also queues a new system job that will process the batch request. All database changes occur in an atomic transaction. :param recipe_type: The type of recipes that should be re-processed :type recipe_type: :class:`recipe.models.RecipeType` :param definition: The definition for running a batch :type definition: :class:`batch.configuration.definition.batch_definition.BatchDefinition` :param title: The human-readable name of the batch :type title: string :param description: An optional description of the batch :type description: string :returns: The newly created batch :rtype: :class:`batch.models.Batch` :raises :class:`batch.exceptions.BatchError`: If general batch parameters are invalid """ # Attempt to get the batch job type try: job_type = JobType.objects.filter( name='scale-batch-creator').last() except JobType.DoesNotExist: raise BatchError('Missing required job type: scale-batch-creator') # Create an event to represent this request trigger_desc = {'user': '******'} event = TriggerEvent.objects.create_trigger_event( 'USER', None, trigger_desc, timezone.now()) batch = Batch() batch.title = title batch.description = description batch.recipe_type = recipe_type batch.definition = definition.get_dict() batch.event = event batch.save() # Setup the job data to process the batch data = JobData() data.add_property_input('Batch ID', str(batch.id)) # Schedule the batch job job = Queue.objects.queue_new_job(job_type, data, event) batch.creator_job = job batch.save() return batch
def start_ingest_tasks(self, ingests, scan_id=None, strike_id=None): """Starts a batch of tasks for the given scan in an atomic transaction. One of scan_id or strike_id must be set. :param ingests: The ingest models :type ingests: list[:class:`ingest.models.Ingest`] :param scan_id: ID of Scan that generated ingest :type scan_id: int :param strike_id: ID of Strike that generated ingest :type strike_id: int """ # Create new ingest job and mark ingest as QUEUED ingest_job_type = Ingest.objects.get_ingest_job_type() for ingest in ingests: logger.debug('Creating ingest task for %s', ingest.file_name) when = ingest.transfer_ended if ingest.transfer_ended else now() desc = {'file_name': ingest.file_name} if scan_id: # Use result from query to get ingest ID # We need to find the id of each ingest that was created. # Using scan_id and file_name together as a unique composite key ingest_id = Ingest.objects.get(scan_id=ingest.scan_id, file_name=ingest.file_name).id desc['scan_id'] = scan_id event = TriggerEvent.objects.create_trigger_event('SCAN_TRANSFER', None, desc, when) elif strike_id: ingest_id = ingest.id desc['strike_id'] = strike_id event = TriggerEvent.objects.create_trigger_event('STRIKE_TRANSFER', None, desc, when) else: raise Exception('One of scan_id or strike_id must be set') data = JobData() data.add_property_input('Ingest ID', str(ingest_id)) exe_configuration = ExecutionConfiguration() if ingest.workspace: exe_configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW) if ingest.new_workspace: exe_configuration.add_job_task_workspace(ingest.new_workspace.name, MODE_RW) ingest_job = Queue.objects.queue_new_job(ingest_job_type, data, event, exe_configuration) ingest.job = ingest_job ingest.status = 'QUEUED' ingest.save() logger.debug('Successfully created ingest task for %s', ingest.file_name)
def test_successful(self): """Tests calling JobData.add_property_input() successfully.""" data = {'input_data': []} job_data = JobData(data) # Method to test, we will test it by calling validate below job_data.add_property_input('Param1', 'Value1') properties = {'Param1': True} # No exception is success warnings = JobData(data).validate_properties(properties) self.assertFalse(warnings)
def test_successful(self, mock_store): """Tests calling JobData.add_output() successfully.""" data = {'output_data': []} job_data = JobData(data) # Method to test, we will test it by calling validate below job_data.add_output('File1', 1) files = ['File1'] # No exception is success warnings = JobData(data).validate_output_files(files) self.assertFalse(warnings)
def process_ingest(self, ingest, source_file_id): """Processes the given source file ingest by creating the appropriate jobs if the rule is triggered. All database changes are made in an atomic transaction. :param ingest: The ingest to process :type ingest: :class:`ingest.models.Ingest` :param source_file_id: The ID of the source file that was ingested :type source_file_id: long """ # Only trigger when this ingest file has the correct media type and ingest types if self._media_type and self._media_type != ingest.media_type: return if not self._data_types.issubset(ingest.get_data_type_tags()): return if not self._media_type: logger.info("Ingest rule for all media types was triggered") else: logger.info("Ingest rule for media type %s was triggered", self._media_type) event = IngestTriggerEvent(self._model, ingest).save_to_db() # Create triggered jobs for job in self._jobs_to_create: job_type = self._job_type_map[(job["job_type"]["name"], job["job_type"]["version"])] file_input_name = job["file_input_name"] job_data = JobData({}) job_data.add_file_input(file_input_name, source_file_id) # If workspace name has been provided, add that to the job data for each output file if "workspace_name" in job: workspace = self._workspace_map[job["workspace_name"]] job_type.get_job_interface().add_workspace_to_data(job_data, workspace.id) logger.info("Queuing new job of type %s %s", job_type.name, job_type.version) Queue.objects.queue_new_job(job_type, job_data.get_dict(), event) # Create triggered recipes for recipe in self._recipes_to_create: recipe_type = self._recipe_type_map[(recipe["recipe_type"]["name"], recipe["recipe_type"]["version"])] file_input_name = recipe["file_input_name"] recipe_data = RecipeData({}) recipe_data.add_file_input(file_input_name, source_file_id) # If workspace name has been provided, add that to the recipe data for each output file if "workspace_name" in recipe: workspace = self._workspace_map[recipe["workspace_name"]] recipe_data.set_workspace_id(workspace.id) logger.info("Queuing new recipe of type %s %s", recipe_type.name, recipe_type.version) Queue.objects.queue_new_recipe(recipe_type, recipe_data.get_dict(), event)
def test_successful(self): """Tests calling JobData.add_file_list_input() successfully.""" data = {'input_data': []} job_data = JobData(data) # Method to test, we will test it by calling validate below job_data.add_file_list_input('File1', [self.file_1.id]) file_desc_1 = ScaleFileDescription() file_desc_1.add_allowed_media_type('application/json') files = {'File1': (True, True, file_desc_1)} # No exception is success warnings = JobData(data).validate_input_files(files) self.assertFalse(warnings)
def test_json(self): """Tests coverting a CancelJobsBulk message to and from JSON""" sys_err = error_test_utils.create_error(category='SYSTEM') data = JobData() batch = batch_test_utils.create_batch() recipe = recipe_test_utils.create_recipe() job_type = job_test_utils.create_job_type() job_1 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err, input=data.get_dict()) job_1.batch_id = batch.id job_1.recipe_id = recipe.id job_1.save() job_2 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err, input=data.get_dict()) # Create message message = CancelJobsBulk() message.started = job_1.last_modified - timedelta(seconds=1) message.ended = job_1.last_modified + timedelta(seconds=1) message.error_categories = ['SYSTEM'] message.error_ids = [sys_err.id] message.job_ids = [job_1.id] message.job_type_ids = [job_type.id] message.status = 'FAILED' message.job_type_names = [job_type.name] message.batch_ids = [batch.id] message.recipe_ids = [recipe.id] message.is_superseded = False # Convert message to JSON and back, and then execute message_json_dict = message.to_json() new_message = CancelJobsBulk.from_json(message_json_dict) result = new_message.execute() self.assertTrue(result) # Should be one cancel message for job 1 self.assertEqual(len(new_message.new_messages), 1) message = new_message.new_messages[0] self.assertEqual(message.type, 'cancel_jobs') self.assertListEqual(message._job_ids, [job_1.id])
def test_workspace_not_active(self, mock_store): """Tests calling JobData.validate_output_files() with a workspace that is not active""" data = {'output_data': [{'name': 'File1', 'workspace_id': 3}]} files = ['File1'] self.assertRaises(InvalidData, JobData(data).validate_output_files, files)
def test_workspace_id_not_integer(self, mock_store): """Tests calling JobData.validate_output_files() when an output has a non-integral value for workspace_id""" data = {'output_data': [{'name': 'File1', 'workspace_id': 'foo'}]} files = ['File1'] self.assertRaises(InvalidData, JobData(data).validate_output_files, files)
def test_missing_workspace_id(self, mock_store): """Tests calling JobData.validate_output_files() when an output is missing the workspace_id field""" data = {'output_data': [{'name': 'File1'}]} files = ['File1'] self.assertRaises(InvalidData, JobData(data).validate_output_files, files)
def test_missing_output(self, mock_store): """Tests calling JobData.validate_output_files() when an output is missing""" data = {'output_data': []} files = ['File1'] self.assertRaises(InvalidData, JobData(data).validate_output_files, files)
def test_bad_file_id(self): """Tests calling JobData.validate_input_files() with a file that has an invalid ID""" data = {'input_data': [{'name': 'File1', 'file_id': 9999999999}]} files = {'File1': (True, False, ScaleFileDescription())} self.assertRaises(InvalidData, JobData(data).validate_input_files, files)
def test_single_non_integral(self): """Tests calling JobData.validate_input_files() with a single file param and non-integral file_id field""" data = {'input_data': [{'name': 'File1', 'file_id': 'STRING'}]} files = {'File1': (True, False, ScaleFileDescription())} self.assertRaises(InvalidData, JobData(data).validate_input_files, files)
def test_multiple_non_list(self): """Tests calling JobData.validate_input_files() with a multiple file param with a non-list for file_ids field""" data = {'input_data': [{'name': 'File1', 'file_ids': 'STRING'}]} files = {'File1': (True, True, ScaleFileDescription())} self.assertRaises(InvalidData, JobData(data).validate_input_files, files)
def test_missing_required(self): """Tests calling JobData.validate_input_files() when a file is required, but missing""" data = {'input_data': []} files = {'File1': (True, True, ScaleFileDescription())} self.assertRaises(InvalidData, JobData(data).validate_input_files, files)
def test_init_successful_one_property(self): """Tests calling JobData constructor successfully with a single property input.""" data = {'input_data': [{'name': 'My Name', 'value': '1'}]} # No exception is success JobData(data)
def test_missing_required(self): """Tests calling JobData.validate_properties() when a property is required, but missing""" data = {'input_data': []} properties = {'Param1': True} self.assertRaises(InvalidData, JobData(data).validate_properties, properties)
def test_missing_value(self): """Tests calling JobData.validate_properties() when a property is missing a value""" data = {'input_data': [{'name': 'Param1'}]} properties = {'Param1': False} self.assertRaises(InvalidData, JobData(data).validate_properties, properties)
def test_value_not_string(self): """Tests calling JobData.validate_properties() when a property has a non-string value""" data = {'input_data': [{'name': 'Param1', 'value': 123}]} properties = {'Param1': False} self.assertRaises(InvalidData, JobData(data).validate_properties, properties)
def test_successful(self): """Tests calling JobData.get_property_values() successfully""" data = { 'input_data': [{ 'name': 'Param1', 'value': 'Value1' }, { 'name': 'Param2', 'file_id': 1 }, { 'name': 'Param3', 'value': 'Value3' }, { 'name': 'Param5', 'value': 'Value5' }] } property_names = ['Param1', 'Param3', 'Param4'] property_values = JobData(data).get_property_values(property_names) self.assertDictEqual(property_values, { 'Param1': 'Value1', 'Param3': 'Value3' })
def test_successful(self): """Tests calling JobData.get_input_file_ids() successfully""" data = { 'input_data': [{ 'name': 'Param1', 'value': 'Value1' }, { 'name': 'Param2', 'file_id': 1 }, { 'name': 'Param3', 'file_ids': [5, 7, 23] }, { 'name': 'Param4', 'file_id': 1 }, { 'name': 'Param5', 'value': 'Value5' }] } file_ids = JobData(data).get_input_file_ids() self.assertSetEqual(set(file_ids), set([1, 5, 7, 23]))
def test_single_missing_file_id(self): """Tests calling JobData.validate_input_files() with a single file param missing the file_id field""" data = {'input_data': [{'name': 'File1'}]} files = {'File1': (True, False, ScaleFileDescription())} self.assertRaises(InvalidData, JobData(data).validate_input_files, files)
def test_files_in_command(self, mock_retrieve_call, mock_os_mkdir, mock_isdir): def new_retrieve(arg1): return { 'files1_out': ['/test/file1/foo.txt', '/test/file1/bar.txt'], } mock_retrieve_call.side_effect = new_retrieve job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env() job_interface_dict['command_arguments'] = '${files1}' job_interface_dict['input_data'] = [{ 'name': 'files1', 'type': 'files', 'required': True, }] job_data_dict['input_data'].append({ 'name': 'files1', 'file_ids': [1, 2, 3], }) job_data_dict['output_data'].append({ 'name': 'files1_out', 'workspace_id': self.workspace.id, }) job_interface = JobInterface(job_interface_dict) job_data = JobData(job_data_dict) job_environment = job_environment_dict job_exe_id = 1 job_interface.perform_pre_steps(job_data, job_environment) job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id) expected_command_arguments = os.path.join(SCALE_JOB_EXE_INPUT_PATH, 'files1') self.assertEqual(job_command_arguments, expected_command_arguments, 'expected a different command from pre_steps')
def test_file_in_command(self, mock_retrieve_call, mock_os_mkdir, mock_get_one_file, mock_isdir): job_exe_id = 1 def new_retrieve(arg1): return { 'file1_out': [input_file_path], } input_file_path = os.path.join(SCALE_JOB_EXE_INPUT_PATH, 'file1', 'foo.txt') mock_retrieve_call.side_effect = new_retrieve mock_get_one_file.side_effect = lambda (arg1): input_file_path job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env() job_interface_dict['command_arguments'] = '${file1}' job_interface_dict['input_data'] = [{ 'name': 'file1', 'type': 'file', 'required': True, }] job_data_dict['input_data'].append({ 'name': 'file1', 'file_id': self.file.id, }) job_data_dict['output_data'].append({ 'name': 'file1_out', 'workspace_id': self.workspace.id, }) job_interface = JobInterface(job_interface_dict) job_data = JobData(job_data_dict) job_environment = job_environment_dict job_interface.perform_pre_steps(job_data, job_environment) job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id) self.assertEqual(job_command_arguments, input_file_path, 'expected a different command from pre_steps')
def create_batch(self, recipe_type, definition, title=None, description=None): """Creates a new batch that represents a group of recipes that should be scheduled for re-processing. This method also queues a new system job that will process the batch request. All database changes occur in an atomic transaction. :param recipe_type: The type of recipes that should be re-processed :type recipe_type: :class:`recipe.models.RecipeType` :param definition: The definition for running a batch :type definition: :class:`batch.configuration.definition.batch_definition.BatchDefinition` :param title: The human-readable name of the batch :type title: string :param description: An optional description of the batch :type description: string :returns: The newly created batch :rtype: :class:`batch.models.Batch` :raises :class:`batch.exceptions.BatchError`: If general batch parameters are invalid """ # Attempt to get the batch job type try: job_type = JobType.objects.filter(name='scale-batch-creator').last() except JobType.DoesNotExist: raise BatchError('Missing required job type: scale-batch-creator') # Create an event to represent this request trigger_desc = {'user': '******'} event = TriggerEvent.objects.create_trigger_event('USER', None, trigger_desc, timezone.now()) batch = Batch() batch.title = title batch.description = description batch.recipe_type = recipe_type batch.definition = definition.get_dict() batch.event = event batch.save() # Setup the job data to process the batch data = JobData() data.add_property_input('Batch ID', str(batch.id)) # Schedule the batch job job = Queue.objects.queue_new_job(job_type, data, event) batch.creator_job = job batch.save() return batch
def process_ingested_source_file(self, source_file, when): """Processes the given ingested source file by checking it against all ingest trigger rules and creating the corresponding jobs and recipes for any triggered rules. All database changes are made in an atomic transaction. :param source_file: The source file that was ingested :type source_file: :class:`source.models.SourceFile` :param when: When the source file was ingested :type when: :class:`datetime.datetime` """ msg = 'Processing trigger rules for ingested source file with media type %s and data types %s' logger.info(msg, source_file.media_type, str(list(source_file.get_data_type_tags()))) any_rules = False for entry in RecipeType.objects.get_active_trigger_rules(INGEST_TYPE): rule = entry[0] thing_to_create = entry[1] rule_config = rule.get_configuration() condition = rule_config.get_condition() if condition.is_condition_met(source_file): logger.info(condition.get_triggered_message()) any_rules = True event = self._create_ingest_trigger_event(source_file, rule, when) workspace = Workspace.objects.get(name=rule_config.get_workspace_name()) if isinstance(thing_to_create, JobType): job_type = thing_to_create job_data = JobData({}) job_data.add_file_input(rule_config.get_input_data_name(), source_file.id) job_type.get_job_interface().add_workspace_to_data(job_data, workspace.id) logger.info('Queuing new job of type %s %s', job_type.name, job_type.version) Queue.objects.queue_new_job(job_type, job_data, event) elif isinstance(thing_to_create, RecipeType): recipe_type = thing_to_create recipe_data = RecipeData({}) recipe_data.add_file_input(rule_config.get_input_data_name(), source_file.id) recipe_data.set_workspace_id(workspace.id) logger.info('Queuing new recipe of type %s %s', recipe_type.name, recipe_type.version) Queue.objects.queue_new_recipe(recipe_type, recipe_data, event) if not any_rules: logger.info('No rules triggered')
def create_strike(self, name, title, description, configuration): """Creates a new Strike process with the given configuration and returns the new Strike model. The Strike model will be saved in the database and the job to run the Strike process will be placed on the queue. All changes to the database will occur in an atomic transaction. :param name: The identifying name of this Strike process :type name: string :param title: The human-readable name of this Strike process :type title: string :param description: A description of this Strike process :type description: string :param configuration: The Strike configuration :type configuration: dict :returns: The new Strike process :rtype: :class:`ingest.models.Strike` :raises :class:`ingest.strike.configuration.exceptions.InvalidStrikeConfiguration`: If the configuration is invalid. """ # Validate the configuration, no exception is success config = StrikeConfiguration(configuration) config.validate() strike = Strike() strike.name = name strike.title = title strike.description = description strike.configuration = config.get_dict() strike.save() strike_type = self.get_strike_job_type() job_data = JobData() job_data.add_property_input('Strike ID', unicode(strike.id)) event_description = {'strike_id': strike.id} event = TriggerEvent.objects.create_trigger_event('STRIKE_CREATED', None, event_description, now()) strike.job = Queue.objects.queue_new_job(strike_type, job_data, event) strike.save() return strike