Beispiel #1
0
    def process_event(self, event, last_event=None):
        """See :meth:`job.clock.ClockEventProcessor.process_event`.

        Compares the new event with the last event any missing metrics jobs.
        """

        # Attempt to get the daily metrics job type
        try:
            job_type = JobType.objects.filter(name='scale-daily-metrics').last()
        except JobType.DoesNotExist:
            raise ClockEventError('Missing required job type: scale-daily-metrics')

        if last_event:
            # Build a list of days that require metrics
            day_count = xrange((event.occurred.date() - last_event.occurred.date()).days)
            days = [last_event.occurred.date() + datetime.timedelta(days=d) for d in day_count]
        else:
            # Use the previous day when first triggered
            days = [timezone.now().date() - datetime.timedelta(days=1)]

        # Schedule one job for each required day
        for day in days:
            job_data = JobData()
            job_data.add_property_input('Day', day.strftime('%Y-%m-%d'))
            Queue.objects.queue_new_job(job_type, job_data, event)
Beispiel #2
0
    def test_json(self):
        """Tests coverting a CancelJobs message to and from JSON"""

        when = now()
        data = JobData()
        job_type = job_test_utils.create_seed_job_type()
        job_1 = job_test_utils.create_job(job_type=job_type, status='PENDING')
        job_2 = job_test_utils.create_job(job_type=job_type,
                                          num_exes=3,
                                          status='FAILED',
                                          input=data.get_dict())
        job_ids = [job_1.id, job_2.id]

        # Add jobs to message
        message = CancelJobs()
        message.when = when
        if message.can_fit_more():
            message.add_job(job_1.id)
        if message.can_fit_more():
            message.add_job(job_2.id)

        # Convert message to JSON and back, and then execute
        message_json_dict = message.to_json()
        new_message = CancelJobs.from_json(message_json_dict)
        result = new_message.execute()

        self.assertTrue(result)
        jobs = Job.objects.filter(id__in=job_ids).order_by('id')
        # Both jobs should have been canceled
        self.assertEqual(jobs[0].status, 'CANCELED')
        self.assertEqual(jobs[0].last_status_change, when)
        self.assertEqual(jobs[1].status, 'CANCELED')
        self.assertEqual(jobs[1].last_status_change, when)
        # No new messages since these jobs do not belong to a recipe
        self.assertEqual(len(new_message.new_messages), 0)
Beispiel #3
0
    def queue_scan(self, scan_id, dry_run=True):
        """Retrieves a Scan model and uses metadata to place a job to run the
        Scan process on the queue. All changes to the database will occur in an
        atomic transaction.

        :param scan_id: The unique identifier of the Scan process.
        :type scan_id: int
        :param dry_run: Whether the scan will execute as a dry run
        :type dry_run: bool
        :returns: The new Scan process
        :rtype: :class:`ingest.models.Scan`
        """

        scan = Scan.objects.select_for_update().get(pk=scan_id)
        scan_type = self.get_scan_job_type()

        job_data = JobData()
        job_data.add_property_input('Scan ID', str(scan.id))
        job_data.add_property_input('Dry Run', str(dry_run))
        event_description = {'scan_id': scan.id}

        if scan.job:
            raise ScanIngestJobAlreadyLaunched

        if dry_run:
            event = TriggerEvent.objects.create_trigger_event('DRY_RUN_SCAN_CREATED', None, event_description, now())
            scan.dry_run_job = Queue.objects.queue_new_job(scan_type, job_data, event)
        else:
            event = TriggerEvent.objects.create_trigger_event('SCAN_CREATED', None, event_description, now())
            scan.job = Queue.objects.queue_new_job(scan_type, job_data, event)

        scan.save()
 
        return scan
Beispiel #4
0
    def _start_ingest_task(self, ingest):
        """Starts a task for the given ingest in an atomic transaction

        :param ingest: The ingest model
        :type ingest: :class:`ingest.models.Ingest`
        """

        logger.info('Creating ingest task for %s', ingest.file_name)

        # Create new ingest job and mark ingest as QUEUED
        ingest_job_type = Ingest.objects.get_ingest_job_type()
        data = JobData()
        data.add_property_input('Ingest ID', str(ingest.id))
        desc = {'strike_id': self.strike_id, 'file_name': ingest.file_name}
        when = ingest.transfer_ended if ingest.transfer_ended else now()
        event = TriggerEvent.objects.create_trigger_event('STRIKE_TRANSFER', None, desc, when)
        job_configuration = JobConfiguration()
        if ingest.workspace:
            job_configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW)
        if ingest.new_workspace:
            job_configuration.add_job_task_workspace(ingest.new_workspace.name, MODE_RW)
        ingest_job = Queue.objects.queue_new_job(ingest_job_type, data, event, job_configuration)

        ingest.job = ingest_job
        ingest.status = 'QUEUED'
        ingest.save()

        logger.info('Successfully created ingest task for %s', ingest.file_name)
    def test_json(self):
        """Tests coverting a RequeueJobsBulk message to and from JSON"""

        sys_err = error_test_utils.create_error(category='SYSTEM')

        data = JobData()
        job_type = job_test_utils.create_job_type()
        job_1 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='FAILED', error=sys_err,
                                          input=data.get_dict())
        job_2 = job_test_utils.create_job(job_type=job_type, num_exes=3, status='CANCELED', error=sys_err,
                                          input=data.get_dict())

        # Create message
        message = RequeueJobsBulk()
        message.started = job_1.last_modified - timedelta(seconds=1)
        message.ended = job_1.last_modified + timedelta(seconds=1)
        message.error_categories = ['SYSTEM']
        message.error_ids = [sys_err.id]
        message.job_ids = [job_1.id]
        message.job_type_ids = [job_type.id]
        message.priority = 1
        message.status = 'FAILED'

        # Convert message to JSON and back, and then execute
        message_json_dict = message.to_json()
        new_message = RequeueJobsBulk.from_json(message_json_dict)
        result = new_message.execute()

        self.assertTrue(result)
        # Should be one re-queue message for job 1
        self.assertEqual(len(new_message.new_messages), 1)
        message = new_message.new_messages[0]
        self.assertEqual(message.type, 'requeue_jobs')
        self.assertListEqual(message._requeue_jobs, [QueuedJob(job_1.id, job_1.num_exes)])
        self.assertEqual(message.priority, 1)
Beispiel #6
0
    def _handle_job_finished(self, job_exe):
        """Handles a job execution finishing (reaching a final status of COMPLETED, FAILED, or CANCELED). The caller
        must have obtained a model lock on the given job_exe model. All database changes occur in an atomic transaction.

        :param job_exe: The job execution that finished
        :type job_exe: :class:`job.models.JobExecution`
        """

        if not job_exe.is_finished:
            raise Exception('Job execution is not finished in status %s' %
                            job_exe.status)

        # Start a cleanup job if this execution requires it
        if job_exe.requires_cleanup:

            if job_exe.cleanup_job:
                raise Exception('Job execution already has a cleanup job')

            cleanup_type = JobType.objects.get_cleanup_job_type()
            data = JobData()
            data.add_property_input('Job Exe ID', str(job_exe.id))
            desc = {'job_exe_id': job_exe.id, 'node_id': job_exe.node_id}
            event = TriggerEvent.objects.create_trigger_event(
                'CLEANUP', None, desc, timezone.now())
            cleanup_job_id = Queue.objects.queue_new_job(
                cleanup_type, data, event).id
            job_exe.cleanup_job_id = cleanup_job_id
            job_exe.save()
Beispiel #7
0
    def process_event(self, event, last_event=None):
        """See :meth:`job.clock.ClockEventProcessor.process_event`.

        Compares the new event with the last event any missing metrics jobs.
        """

        # Attempt to get the daily metrics job type
        try:
            job_type = JobType.objects.filter(
                name='scale-daily-metrics').last()
        except JobType.DoesNotExist:
            raise ClockEventError(
                'Missing required job type: scale-daily-metrics')

        if last_event:
            # Build a list of days that require metrics
            day_count = xrange(
                (event.occurred.date() - last_event.occurred.date()).days)
            days = [
                last_event.occurred.date() + datetime.timedelta(days=d)
                for d in day_count
            ]
        else:
            # Use the previous day when first triggered
            days = [timezone.now().date() - datetime.timedelta(days=1)]

        # Schedule one job for each required day
        for day in days:
            job_data = JobData()
            job_data.add_property_input('Day', day.strftime('%Y-%m-%d'))
            Queue.objects.queue_new_job(job_type, job_data, event)
Beispiel #8
0
    def _handle_job_finished(self, job_exe):
        """Handles a job execution finishing (reaching a final status of COMPLETED, FAILED, or CANCELED). The caller
        must have obtained a model lock on the given job_exe model. All database changes occur in an atomic transaction.

        :param job_exe: The job execution that finished
        :type job_exe: :class:`job.models.JobExecution`
        """

        if not job_exe.is_finished:
            raise Exception('Job execution is not finished in status %s' % job_exe.status)

        # Start a cleanup job if this execution requires it
        if job_exe.requires_cleanup:

            if job_exe.cleanup_job:
                raise Exception('Job execution already has a cleanup job')

            cleanup_type = JobType.objects.get_cleanup_job_type()
            data = JobData()
            data.add_property_input('Job Exe ID', str(job_exe.id))
            desc = {'job_exe_id': job_exe.id, 'node_id': job_exe.node_id}
            event = TriggerEvent.objects.create_trigger_event('CLEANUP', None, desc, timezone.now())
            cleanup_job_id = Queue.objects.queue_new_job(cleanup_type, data, event).id
            job_exe.cleanup_job_id = cleanup_job_id
            job_exe.save()
    def populate_job_configuration(apps, schema_editor):
        from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW
        from job.configuration.data.job_data import JobData

        # Go through all of the job models that have job data and populate their configuration
        Job = apps.get_model("job", "Job")
        ScaleFile = apps.get_model("storage", "ScaleFile")
        Workspace = apps.get_model("storage", "Workspace")
        total_count = Job.objects.all().count()
        workspaces = {}
        for workspace in Workspace.objects.all().iterator():
            workspaces[workspace.id] = workspace
        print "Populating new configuration field for %s jobs" % str(total_count)
        done_count = 0
        batch_size = 1000
        while done_count < total_count:
            percent = (float(done_count) / float(total_count)) * 100.00
            print "Completed %s of %s jobs (%f%%)" % (done_count, total_count, percent)
            batch_end = done_count + batch_size
            for job in Job.objects.select_related("job_type").order_by("id")[done_count:batch_end]:

                # Ignore jobs that don't have their job data populated yet
                if not job.data:
                    continue

                data = JobData(job.data)
                input_file_ids = data.get_input_file_ids()
                input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related("workspace").iterator()
                input_workspaces = set()
                for input_file in input_files:
                    input_workspaces.add(input_file.workspace.name)

                configuration = JobConfiguration()
                for name in input_workspaces:
                    configuration.add_job_task_workspace(name, MODE_RO)
                if not job.job_type.is_system:
                    for name in input_workspaces:
                        configuration.add_pre_task_workspace(name, MODE_RO)
                        # We add input workspaces to post task so it can perform a parse results move if requested by the
                        # job's results manifest
                        configuration.add_post_task_workspace(name, MODE_RW)
                    for workspace_id in data.get_output_workspace_ids():
                        workspace = workspaces[workspace_id]
                        if workspace.name not in input_workspaces:
                            configuration.add_post_task_workspace(workspace.name, MODE_RW)
                elif job.job_type.name == "scale-ingest":
                    ingest_id = data.get_property_values(["Ingest ID"])["Ingest ID"]
                    from ingest.models import Ingest

                    ingest = Ingest.objects.select_related("workspace").get(id=ingest_id)
                    configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW)

                job.configuration = configuration.get_dict()
                job.save()
            done_count += batch_size
        print "All %s jobs completed" % str(total_count)
Beispiel #10
0
    def process_parse(self, source_file):
        '''Processes the given source file parse by creating the appropriate jobs if the rule is triggered. All
        database changes are made in an atomic transaction.

        :param source_file_id: The source file that was parsed
        :type source_file_id: :class:`source.models.SourceFile`
        '''

        # If this parse file has the correct media type or the correct data types, the rule is triggered
        media_type_match = not self._media_type or self._media_type == source_file.media_type
        data_types_match = not self._data_types or self._data_types <= source_file.get_data_type_tags()

        if not media_type_match or not data_types_match:
            return

        msg = 'Parse rule for '
        if not self._media_type:
            msg += 'all media types '
        else:
            msg += 'media type %s ' % self._media_type
        if self._data_types:
            msg += 'and data types %s ' % ','.join(self._data_types)
        msg += 'was triggered'
        logger.info(msg)

        event = ParseTriggerEvent(self._model, source_file).save_to_db()

        # Create triggered jobs
        for job in self._jobs_to_create:
            job_type = self._job_type_map[(job['job_type']['name'], job['job_type']['version'])]
            file_input_name = job['file_input_name']
            job_data = JobData({})
            job_data.add_file_input(file_input_name, source_file.id)

            # If workspace name has been provided, add that to the job data for each output file
            if 'workspace_name' in job:
                workspace = self._workspace_map[job['workspace_name']]
                job_type.get_job_interface().add_workspace_to_data(job_data, workspace.id)
            logger.info('Queuing new job of type %s %s', job_type.name, job_type.version)
            Queue.objects.queue_new_job(job_type, job_data.get_dict(), event)

        # Create triggered recipes
        for recipe in self._recipes_to_create:
            recipe_type = self._recipe_type_map[(recipe['recipe_type']['name'], recipe['recipe_type']['version'])]
            file_input_name = recipe['file_input_name']
            recipe_data = RecipeData({})
            recipe_data.add_file_input(file_input_name, source_file.id)

            # If workspace name has been provided, add that to the recipe data for each output file
            if 'workspace_name' in recipe:
                workspace = self._workspace_map[recipe['workspace_name']]
                recipe_data.set_workspace_id(workspace.id)
            logger.info('Queuing new recipe of type %s %s', recipe_type.name, recipe_type.version)
            Queue.objects.queue_new_recipe(recipe_type, recipe_data.get_dict(), event)
Beispiel #11
0
    def populate_job_configuration(apps, schema_editor):
        from job.execution.configuration.json.exe_config import ExecutionConfiguration, MODE_RO, MODE_RW
        from job.configuration.data.job_data import JobData
        # Go through all of the job models that have job data and populate their configuration
        Job = apps.get_model('job', 'Job')
        ScaleFile = apps.get_model('storage', 'ScaleFile')
        Workspace = apps.get_model('storage', 'Workspace')
        total_count = Job.objects.all().count()
        workspaces = {}
        for workspace in Workspace.objects.all().iterator():
            workspaces[workspace.id] = workspace
        print 'Populating new configuration field for %s jobs' % str(total_count)
        done_count = 0
        batch_size = 1000
        while done_count < total_count:
            percent = (float(done_count) / float(total_count)) * 100.00
            print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count, percent)
            batch_end = done_count + batch_size
            for job in Job.objects.select_related('job_type').order_by('id')[done_count:batch_end]:

                # Ignore jobs that don't have their job data populated yet
                if not job.data:
                    continue

                data = JobData(job.data)
                input_file_ids = data.get_input_file_ids()
                input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related('workspace').iterator()
                input_workspaces = set()
                for input_file in input_files:
                    input_workspaces.add(input_file.workspace.name)

                configuration = ExecutionConfiguration()
                for name in input_workspaces:
                    configuration.add_job_task_workspace(name, MODE_RO)
                if not job.job_type.is_system:
                    for name in input_workspaces:
                        configuration.add_pre_task_workspace(name, MODE_RO)
                        # We add input workspaces to post task so it can perform a parse results move if requested by the
                        # job's results manifest
                        configuration.add_post_task_workspace(name, MODE_RW)
                    for workspace_id in data.get_output_workspace_ids():
                        workspace = workspaces[workspace_id]
                        if workspace.name not in input_workspaces:
                            configuration.add_post_task_workspace(workspace.name, MODE_RW)
                elif job.job_type.name == 'scale-ingest':
                    ingest_id = data.get_property_values(['Ingest ID'])['Ingest ID']
                    from ingest.models import Ingest
                    ingest = Ingest.objects.select_related('workspace').get(id=ingest_id)
                    configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW)

                job.configuration = configuration.get_dict()
                job.save()
            done_count += batch_size
        print 'All %s jobs completed' % str(total_count)
Beispiel #12
0
    def process_ingested_source_file(self, source_file, when):
        """Processes the given ingested source file by checking it against all ingest trigger rules and creating the
        corresponding jobs and recipes for any triggered rules. All database changes are made in an atomic transaction.

        :param source_file: The source file that was ingested
        :type source_file: :class:`source.models.SourceFile`
        :param when: When the source file was ingested
        :type when: :class:`datetime.datetime`
        """

        msg = 'Processing trigger rules for ingested source file with media type %s and data types %s'
        logger.info(msg, source_file.media_type,
                    str(list(source_file.get_data_type_tags())))

        any_rules = False
        for entry in RecipeType.objects.get_active_trigger_rules(INGEST_TYPE):
            rule = entry[0]
            thing_to_create = entry[1]
            rule_config = rule.get_configuration()
            condition = rule_config.get_condition()

            if condition.is_condition_met(source_file):
                logger.info(condition.get_triggered_message())
                any_rules = True

                event = self._create_ingest_trigger_event(
                    source_file, rule, when)
                workspace = Workspace.objects.get(
                    name=rule_config.get_workspace_name())

                if isinstance(thing_to_create, JobType):
                    job_type = thing_to_create
                    job_data = JobData({})
                    job_data.add_file_input(rule_config.get_input_data_name(),
                                            source_file.id)
                    job_type.get_job_interface().add_workspace_to_data(
                        job_data, workspace.id)
                    logger.info('Queuing new job of type %s %s', job_type.name,
                                job_type.version)
                    Queue.objects.queue_new_job(job_type, job_data, event)
                elif isinstance(thing_to_create, RecipeType):
                    recipe_type = thing_to_create
                    recipe_data = RecipeData({})
                    recipe_data.add_file_input(
                        rule_config.get_input_data_name(), source_file.id)
                    recipe_data.set_workspace_id(workspace.id)
                    logger.info('Queuing new recipe of type %s %s',
                                recipe_type.name, recipe_type.version)
                    Queue.objects.queue_new_recipe(recipe_type, recipe_data,
                                                   event)

        if not any_rules:
            logger.info('No rules triggered')
Beispiel #13
0
    def create_batch(self,
                     recipe_type,
                     definition,
                     title=None,
                     description=None):
        """Creates a new batch that represents a group of recipes that should be scheduled for re-processing. This
        method also queues a new system job that will process the batch request. All database changes occur in an atomic
        transaction.

        :param recipe_type: The type of recipes that should be re-processed
        :type recipe_type: :class:`recipe.models.RecipeType`
        :param definition: The definition for running a batch
        :type definition: :class:`batch.configuration.definition.batch_definition.BatchDefinition`
        :param title: The human-readable name of the batch
        :type title: string
        :param description: An optional description of the batch
        :type description: string
        :returns: The newly created batch
        :rtype: :class:`batch.models.Batch`

        :raises :class:`batch.exceptions.BatchError`: If general batch parameters are invalid
        """

        # Attempt to get the batch job type
        try:
            job_type = JobType.objects.filter(
                name='scale-batch-creator').last()
        except JobType.DoesNotExist:
            raise BatchError('Missing required job type: scale-batch-creator')

        # Create an event to represent this request
        trigger_desc = {'user': '******'}
        event = TriggerEvent.objects.create_trigger_event(
            'USER', None, trigger_desc, timezone.now())

        batch = Batch()
        batch.title = title
        batch.description = description
        batch.recipe_type = recipe_type
        batch.definition = definition.get_dict()
        batch.event = event
        batch.save()

        # Setup the job data to process the batch
        data = JobData()
        data.add_property_input('Batch ID', str(batch.id))

        # Schedule the batch job
        job = Queue.objects.queue_new_job(job_type, data, event)
        batch.creator_job = job
        batch.save()

        return batch
Beispiel #14
0
    def start_ingest_tasks(self, ingests, scan_id=None, strike_id=None):
        """Starts a batch of tasks for the given scan in an atomic transaction.

        One of scan_id or strike_id must be set.

        :param ingests: The ingest models
        :type ingests: list[:class:`ingest.models.Ingest`]
        :param scan_id: ID of Scan that generated ingest
        :type scan_id: int
        :param strike_id: ID of Strike that generated ingest
        :type strike_id: int
        """

        # Create new ingest job and mark ingest as QUEUED
        ingest_job_type = Ingest.objects.get_ingest_job_type()

        for ingest in ingests:
            logger.debug('Creating ingest task for %s', ingest.file_name)

            when = ingest.transfer_ended if ingest.transfer_ended else now()
            desc = {'file_name': ingest.file_name}

            if scan_id:
                # Use result from query to get ingest ID
                # We need to find the id of each ingest that was created.
                # Using scan_id and file_name together as a unique composite key
                ingest_id = Ingest.objects.get(scan_id=ingest.scan_id, file_name=ingest.file_name).id

                desc['scan_id'] = scan_id
                event = TriggerEvent.objects.create_trigger_event('SCAN_TRANSFER', None, desc, when)
            elif strike_id:
                ingest_id = ingest.id
                desc['strike_id'] = strike_id
                event = TriggerEvent.objects.create_trigger_event('STRIKE_TRANSFER', None, desc, when)
            else:
                raise Exception('One of scan_id or strike_id must be set')

            data = JobData()
            data.add_property_input('Ingest ID', str(ingest_id))

            exe_configuration = ExecutionConfiguration()
            if ingest.workspace:
                exe_configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW)
            if ingest.new_workspace:
                exe_configuration.add_job_task_workspace(ingest.new_workspace.name, MODE_RW)
            ingest_job = Queue.objects.queue_new_job(ingest_job_type, data, event, exe_configuration)

            ingest.job = ingest_job
            ingest.status = 'QUEUED'
            ingest.save()

            logger.debug('Successfully created ingest task for %s', ingest.file_name)
Beispiel #15
0
    def test_successful(self):
        """Tests calling JobData.add_property_input() successfully."""

        data = {'input_data': []}
        job_data = JobData(data)

        # Method to test, we will test it by calling validate below
        job_data.add_property_input('Param1', 'Value1')

        properties = {'Param1': True}
        # No exception is success
        warnings = JobData(data).validate_properties(properties)
        self.assertFalse(warnings)
Beispiel #16
0
    def test_successful(self, mock_store):
        """Tests calling JobData.add_output() successfully."""

        data = {'output_data': []}
        job_data = JobData(data)

        # Method to test, we will test it by calling validate below
        job_data.add_output('File1', 1)

        files = ['File1']
        # No exception is success
        warnings = JobData(data).validate_output_files(files)
        self.assertFalse(warnings)
Beispiel #17
0
    def test_successful(self):
        """Tests calling JobData.add_property_input() successfully."""

        data = {'input_data': []}
        job_data = JobData(data)

        # Method to test, we will test it by calling validate below
        job_data.add_property_input('Param1', 'Value1')

        properties = {'Param1': True}
        # No exception is success
        warnings = JobData(data).validate_properties(properties)
        self.assertFalse(warnings)
Beispiel #18
0
    def test_successful(self, mock_store):
        """Tests calling JobData.add_output() successfully."""

        data = {'output_data': []}
        job_data = JobData(data)

        # Method to test, we will test it by calling validate below
        job_data.add_output('File1', 1)

        files = ['File1']
        # No exception is success
        warnings = JobData(data).validate_output_files(files)
        self.assertFalse(warnings)
Beispiel #19
0
    def process_ingest(self, ingest, source_file_id):
        """Processes the given source file ingest by creating the appropriate jobs if the rule is triggered. All
        database changes are made in an atomic transaction.

        :param ingest: The ingest to process
        :type ingest: :class:`ingest.models.Ingest`
        :param source_file_id: The ID of the source file that was ingested
        :type source_file_id: long
        """

        # Only trigger when this ingest file has the correct media type and ingest types
        if self._media_type and self._media_type != ingest.media_type:
            return
        if not self._data_types.issubset(ingest.get_data_type_tags()):
            return

        if not self._media_type:
            logger.info("Ingest rule for all media types was triggered")
        else:
            logger.info("Ingest rule for media type %s was triggered", self._media_type)
        event = IngestTriggerEvent(self._model, ingest).save_to_db()

        # Create triggered jobs
        for job in self._jobs_to_create:
            job_type = self._job_type_map[(job["job_type"]["name"], job["job_type"]["version"])]
            file_input_name = job["file_input_name"]
            job_data = JobData({})
            job_data.add_file_input(file_input_name, source_file_id)

            # If workspace name has been provided, add that to the job data for each output file
            if "workspace_name" in job:
                workspace = self._workspace_map[job["workspace_name"]]
                job_type.get_job_interface().add_workspace_to_data(job_data, workspace.id)
            logger.info("Queuing new job of type %s %s", job_type.name, job_type.version)
            Queue.objects.queue_new_job(job_type, job_data.get_dict(), event)

        # Create triggered recipes
        for recipe in self._recipes_to_create:
            recipe_type = self._recipe_type_map[(recipe["recipe_type"]["name"], recipe["recipe_type"]["version"])]
            file_input_name = recipe["file_input_name"]
            recipe_data = RecipeData({})
            recipe_data.add_file_input(file_input_name, source_file_id)

            # If workspace name has been provided, add that to the recipe data for each output file
            if "workspace_name" in recipe:
                workspace = self._workspace_map[recipe["workspace_name"]]
                recipe_data.set_workspace_id(workspace.id)
            logger.info("Queuing new recipe of type %s %s", recipe_type.name, recipe_type.version)
            Queue.objects.queue_new_recipe(recipe_type, recipe_data.get_dict(), event)
Beispiel #20
0
    def test_successful(self):
        """Tests calling JobData.add_file_list_input() successfully."""

        data = {'input_data': []}
        job_data = JobData(data)

        # Method to test, we will test it by calling validate below
        job_data.add_file_list_input('File1', [self.file_1.id])

        file_desc_1 = ScaleFileDescription()
        file_desc_1.add_allowed_media_type('application/json')
        files = {'File1': (True, True, file_desc_1)}
        # No exception is success
        warnings = JobData(data).validate_input_files(files)
        self.assertFalse(warnings)
Beispiel #21
0
    def test_successful(self):
        """Tests calling JobData.add_file_list_input() successfully."""

        data = {'input_data': []}
        job_data = JobData(data)

        # Method to test, we will test it by calling validate below
        job_data.add_file_list_input('File1', [self.file_1.id])

        file_desc_1 = ScaleFileDescription()
        file_desc_1.add_allowed_media_type('application/json')
        files = {'File1': (True, True, file_desc_1)}
        # No exception is success
        warnings = JobData(data).validate_input_files(files)
        self.assertFalse(warnings)
Beispiel #22
0
    def test_json(self):
        """Tests coverting a CancelJobsBulk message to and from JSON"""

        sys_err = error_test_utils.create_error(category='SYSTEM')

        data = JobData()
        batch = batch_test_utils.create_batch()
        recipe = recipe_test_utils.create_recipe()
        job_type = job_test_utils.create_job_type()
        job_1 = job_test_utils.create_job(job_type=job_type,
                                          num_exes=3,
                                          status='FAILED',
                                          error=sys_err,
                                          input=data.get_dict())
        job_1.batch_id = batch.id
        job_1.recipe_id = recipe.id
        job_1.save()
        job_2 = job_test_utils.create_job(job_type=job_type,
                                          num_exes=3,
                                          status='FAILED',
                                          error=sys_err,
                                          input=data.get_dict())

        # Create message
        message = CancelJobsBulk()
        message.started = job_1.last_modified - timedelta(seconds=1)
        message.ended = job_1.last_modified + timedelta(seconds=1)
        message.error_categories = ['SYSTEM']
        message.error_ids = [sys_err.id]
        message.job_ids = [job_1.id]
        message.job_type_ids = [job_type.id]
        message.status = 'FAILED'
        message.job_type_names = [job_type.name]
        message.batch_ids = [batch.id]
        message.recipe_ids = [recipe.id]
        message.is_superseded = False

        # Convert message to JSON and back, and then execute
        message_json_dict = message.to_json()
        new_message = CancelJobsBulk.from_json(message_json_dict)
        result = new_message.execute()

        self.assertTrue(result)
        # Should be one cancel message for job 1
        self.assertEqual(len(new_message.new_messages), 1)
        message = new_message.new_messages[0]
        self.assertEqual(message.type, 'cancel_jobs')
        self.assertListEqual(message._job_ids, [job_1.id])
Beispiel #23
0
    def test_workspace_not_active(self, mock_store):
        """Tests calling JobData.validate_output_files() with a workspace that is not active"""

        data = {'output_data': [{'name': 'File1', 'workspace_id': 3}]}
        files = ['File1']
        self.assertRaises(InvalidData,
                          JobData(data).validate_output_files, files)
Beispiel #24
0
    def test_workspace_id_not_integer(self, mock_store):
        """Tests calling JobData.validate_output_files() when an output has a non-integral value for workspace_id"""

        data = {'output_data': [{'name': 'File1', 'workspace_id': 'foo'}]}
        files = ['File1']
        self.assertRaises(InvalidData,
                          JobData(data).validate_output_files, files)
Beispiel #25
0
    def test_missing_workspace_id(self, mock_store):
        """Tests calling JobData.validate_output_files() when an output is missing the workspace_id field"""

        data = {'output_data': [{'name': 'File1'}]}
        files = ['File1']
        self.assertRaises(InvalidData,
                          JobData(data).validate_output_files, files)
Beispiel #26
0
    def test_missing_output(self, mock_store):
        """Tests calling JobData.validate_output_files() when an output is missing"""

        data = {'output_data': []}
        files = ['File1']
        self.assertRaises(InvalidData,
                          JobData(data).validate_output_files, files)
Beispiel #27
0
    def test_bad_file_id(self):
        """Tests calling JobData.validate_input_files() with a file that has an invalid ID"""

        data = {'input_data': [{'name': 'File1', 'file_id': 9999999999}]}
        files = {'File1': (True, False, ScaleFileDescription())}
        self.assertRaises(InvalidData,
                          JobData(data).validate_input_files, files)
Beispiel #28
0
    def test_single_non_integral(self):
        """Tests calling JobData.validate_input_files() with a single file param and non-integral file_id field"""

        data = {'input_data': [{'name': 'File1', 'file_id': 'STRING'}]}
        files = {'File1': (True, False, ScaleFileDescription())}
        self.assertRaises(InvalidData,
                          JobData(data).validate_input_files, files)
Beispiel #29
0
    def test_multiple_non_list(self):
        """Tests calling JobData.validate_input_files() with a multiple file param with a non-list for file_ids field"""

        data = {'input_data': [{'name': 'File1', 'file_ids': 'STRING'}]}
        files = {'File1': (True, True, ScaleFileDescription())}
        self.assertRaises(InvalidData,
                          JobData(data).validate_input_files, files)
Beispiel #30
0
    def test_missing_required(self):
        """Tests calling JobData.validate_input_files() when a file is required, but missing"""

        data = {'input_data': []}
        files = {'File1': (True, True, ScaleFileDescription())}
        self.assertRaises(InvalidData,
                          JobData(data).validate_input_files, files)
Beispiel #31
0
    def test_init_successful_one_property(self):
        """Tests calling JobData constructor successfully with a single property input."""

        data = {'input_data': [{'name': 'My Name', 'value': '1'}]}

        # No exception is success
        JobData(data)
Beispiel #32
0
    def test_missing_required(self):
        """Tests calling JobData.validate_properties() when a property is required, but missing"""

        data = {'input_data': []}
        properties = {'Param1': True}
        self.assertRaises(InvalidData,
                          JobData(data).validate_properties, properties)
Beispiel #33
0
    def test_missing_value(self):
        """Tests calling JobData.validate_properties() when a property is missing a value"""

        data = {'input_data': [{'name': 'Param1'}]}
        properties = {'Param1': False}
        self.assertRaises(InvalidData,
                          JobData(data).validate_properties, properties)
Beispiel #34
0
    def test_value_not_string(self):
        """Tests calling JobData.validate_properties() when a property has a non-string value"""

        data = {'input_data': [{'name': 'Param1', 'value': 123}]}
        properties = {'Param1': False}
        self.assertRaises(InvalidData,
                          JobData(data).validate_properties, properties)
Beispiel #35
0
    def test_successful(self):
        """Tests calling JobData.get_property_values() successfully"""

        data = {
            'input_data': [{
                'name': 'Param1',
                'value': 'Value1'
            }, {
                'name': 'Param2',
                'file_id': 1
            }, {
                'name': 'Param3',
                'value': 'Value3'
            }, {
                'name': 'Param5',
                'value': 'Value5'
            }]
        }
        property_names = ['Param1', 'Param3', 'Param4']

        property_values = JobData(data).get_property_values(property_names)

        self.assertDictEqual(property_values, {
            'Param1': 'Value1',
            'Param3': 'Value3'
        })
Beispiel #36
0
    def test_successful(self):
        """Tests calling JobData.get_input_file_ids() successfully"""

        data = {
            'input_data': [{
                'name': 'Param1',
                'value': 'Value1'
            }, {
                'name': 'Param2',
                'file_id': 1
            }, {
                'name': 'Param3',
                'file_ids': [5, 7, 23]
            }, {
                'name': 'Param4',
                'file_id': 1
            }, {
                'name': 'Param5',
                'value': 'Value5'
            }]
        }

        file_ids = JobData(data).get_input_file_ids()

        self.assertSetEqual(set(file_ids), set([1, 5, 7, 23]))
Beispiel #37
0
    def test_single_missing_file_id(self):
        """Tests calling JobData.validate_input_files() with a single file param missing the file_id field"""

        data = {'input_data': [{'name': 'File1'}]}
        files = {'File1': (True, False, ScaleFileDescription())}
        self.assertRaises(InvalidData,
                          JobData(data).validate_input_files, files)
Beispiel #38
0
    def test_files_in_command(self, mock_retrieve_call, mock_os_mkdir, mock_isdir):
        def new_retrieve(arg1):
            return {
                'files1_out': ['/test/file1/foo.txt', '/test/file1/bar.txt'],
            }

        mock_retrieve_call.side_effect = new_retrieve
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env()
        job_interface_dict['command_arguments'] = '${files1}'
        job_interface_dict['input_data'] = [{
            'name': 'files1',
            'type': 'files',
            'required': True,
        }]
        job_data_dict['input_data'].append({
            'name': 'files1',
            'file_ids': [1, 2, 3],
        })
        job_data_dict['output_data'].append({
            'name': 'files1_out',
            'workspace_id': self.workspace.id,
        })

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = job_environment_dict
        job_exe_id = 1

        job_interface.perform_pre_steps(job_data, job_environment)
        job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id)
        expected_command_arguments = os.path.join(SCALE_JOB_EXE_INPUT_PATH, 'files1')
        self.assertEqual(job_command_arguments, expected_command_arguments,
                         'expected a different command from pre_steps')
Beispiel #39
0
    def test_file_in_command(self, mock_retrieve_call, mock_os_mkdir, mock_get_one_file, mock_isdir):
        job_exe_id = 1

        def new_retrieve(arg1):
            return {
                'file1_out': [input_file_path],
            }

        input_file_path = os.path.join(SCALE_JOB_EXE_INPUT_PATH, 'file1', 'foo.txt')
        mock_retrieve_call.side_effect = new_retrieve
        mock_get_one_file.side_effect = lambda (arg1): input_file_path
        job_interface_dict, job_data_dict, job_environment_dict = self._get_simple_interface_data_env()
        job_interface_dict['command_arguments'] = '${file1}'
        job_interface_dict['input_data'] = [{
            'name': 'file1',
            'type': 'file',
            'required': True,
        }]
        job_data_dict['input_data'].append({
            'name': 'file1',
            'file_id': self.file.id,
        })
        job_data_dict['output_data'].append({
            'name': 'file1_out',
            'workspace_id': self.workspace.id,
        })

        job_interface = JobInterface(job_interface_dict)
        job_data = JobData(job_data_dict)
        job_environment = job_environment_dict

        job_interface.perform_pre_steps(job_data, job_environment)
        job_command_arguments = job_interface.fully_populate_command_argument(job_data, job_environment, job_exe_id)
        self.assertEqual(job_command_arguments, input_file_path, 'expected a different command from pre_steps')
Beispiel #40
0
    def create_batch(self, recipe_type, definition, title=None, description=None):
        """Creates a new batch that represents a group of recipes that should be scheduled for re-processing. This
        method also queues a new system job that will process the batch request. All database changes occur in an atomic
        transaction.

        :param recipe_type: The type of recipes that should be re-processed
        :type recipe_type: :class:`recipe.models.RecipeType`
        :param definition: The definition for running a batch
        :type definition: :class:`batch.configuration.definition.batch_definition.BatchDefinition`
        :param title: The human-readable name of the batch
        :type title: string
        :param description: An optional description of the batch
        :type description: string
        :returns: The newly created batch
        :rtype: :class:`batch.models.Batch`

        :raises :class:`batch.exceptions.BatchError`: If general batch parameters are invalid
        """

        # Attempt to get the batch job type
        try:
            job_type = JobType.objects.filter(name='scale-batch-creator').last()
        except JobType.DoesNotExist:
            raise BatchError('Missing required job type: scale-batch-creator')

        # Create an event to represent this request
        trigger_desc = {'user': '******'}
        event = TriggerEvent.objects.create_trigger_event('USER', None, trigger_desc, timezone.now())

        batch = Batch()
        batch.title = title
        batch.description = description
        batch.recipe_type = recipe_type
        batch.definition = definition.get_dict()
        batch.event = event
        batch.save()

        # Setup the job data to process the batch
        data = JobData()
        data.add_property_input('Batch ID', str(batch.id))

        # Schedule the batch job
        job = Queue.objects.queue_new_job(job_type, data, event)
        batch.creator_job = job
        batch.save()

        return batch
    def process_ingested_source_file(self, source_file, when):
        """Processes the given ingested source file by checking it against all ingest trigger rules and creating the
        corresponding jobs and recipes for any triggered rules. All database changes are made in an atomic transaction.

        :param source_file: The source file that was ingested
        :type source_file: :class:`source.models.SourceFile`
        :param when: When the source file was ingested
        :type when: :class:`datetime.datetime`
        """

        msg = 'Processing trigger rules for ingested source file with media type %s and data types %s'
        logger.info(msg, source_file.media_type, str(list(source_file.get_data_type_tags())))

        any_rules = False
        for entry in RecipeType.objects.get_active_trigger_rules(INGEST_TYPE):
            rule = entry[0]
            thing_to_create = entry[1]
            rule_config = rule.get_configuration()
            condition = rule_config.get_condition()

            if condition.is_condition_met(source_file):
                logger.info(condition.get_triggered_message())
                any_rules = True

                event = self._create_ingest_trigger_event(source_file, rule, when)
                workspace = Workspace.objects.get(name=rule_config.get_workspace_name())

                if isinstance(thing_to_create, JobType):
                    job_type = thing_to_create
                    job_data = JobData({})
                    job_data.add_file_input(rule_config.get_input_data_name(), source_file.id)
                    job_type.get_job_interface().add_workspace_to_data(job_data, workspace.id)
                    logger.info('Queuing new job of type %s %s', job_type.name, job_type.version)
                    Queue.objects.queue_new_job(job_type, job_data, event)
                elif isinstance(thing_to_create, RecipeType):
                    recipe_type = thing_to_create
                    recipe_data = RecipeData({})
                    recipe_data.add_file_input(rule_config.get_input_data_name(), source_file.id)
                    recipe_data.set_workspace_id(workspace.id)
                    logger.info('Queuing new recipe of type %s %s', recipe_type.name, recipe_type.version)
                    Queue.objects.queue_new_recipe(recipe_type, recipe_data, event)

        if not any_rules:
            logger.info('No rules triggered')
Beispiel #42
0
    def create_strike(self, name, title, description, configuration):
        """Creates a new Strike process with the given configuration and returns the new Strike model. The Strike model
        will be saved in the database and the job to run the Strike process will be placed on the queue. All changes to
        the database will occur in an atomic transaction.

        :param name: The identifying name of this Strike process
        :type name: string
        :param title: The human-readable name of this Strike process
        :type title: string
        :param description: A description of this Strike process
        :type description: string
        :param configuration: The Strike configuration
        :type configuration: dict
        :returns: The new Strike process
        :rtype: :class:`ingest.models.Strike`

        :raises :class:`ingest.strike.configuration.exceptions.InvalidStrikeConfiguration`: If the configuration is
            invalid.
        """

        # Validate the configuration, no exception is success
        config = StrikeConfiguration(configuration)
        config.validate()

        strike = Strike()
        strike.name = name
        strike.title = title
        strike.description = description
        strike.configuration = config.get_dict()
        strike.save()

        strike_type = self.get_strike_job_type()
        job_data = JobData()
        job_data.add_property_input('Strike ID', unicode(strike.id))
        event_description = {'strike_id': strike.id}
        event = TriggerEvent.objects.create_trigger_event('STRIKE_CREATED', None, event_description, now())
        strike.job = Queue.objects.queue_new_job(strike_type, job_data, event)
        strike.save()

        return strike