Exemple #1
0
    def _start_ingest_task(self, ingest):
        """Starts a task for the given ingest in an atomic transaction

        :param ingest: The ingest model
        :type ingest: :class:`ingest.models.Ingest`
        """

        logger.info('Creating ingest task for %s', ingest.file_name)

        # Create new ingest job and mark ingest as QUEUED
        ingest_job_type = Ingest.objects.get_ingest_job_type()
        data = JobData()
        data.add_property_input('Ingest ID', str(ingest.id))
        desc = {'strike_id': self.strike_id, 'file_name': ingest.file_name}
        when = ingest.transfer_ended if ingest.transfer_ended else now()
        event = TriggerEvent.objects.create_trigger_event('STRIKE_TRANSFER', None, desc, when)
        job_configuration = JobConfiguration()
        if ingest.workspace:
            job_configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW)
        if ingest.new_workspace:
            job_configuration.add_job_task_workspace(ingest.new_workspace.name, MODE_RW)
        ingest_job = Queue.objects.queue_new_job(ingest_job_type, data, event, job_configuration)

        ingest.job = ingest_job
        ingest.status = 'QUEUED'
        ingest.save()

        logger.info('Successfully created ingest task for %s', ingest.file_name)
Exemple #2
0
    def queue_new_job(self, job_type, data, event, configuration=None):
        """Creates a new job for the given type and data. The new job is immediately placed on the queue. The new job,
        job_exe, and queue models are saved in the database in an atomic transaction.

        :param job_type: The type of the new job to create and queue
        :type job_type: :class:`job.models.JobType`
        :param data: The job data to run on
        :type data: :class:`job.configuration.data.job_data.JobData`
        :param event: The event that triggered the creation of this job
        :type event: :class:`trigger.models.TriggerEvent`
        :param configuration: The optional initial job configuration
        :type configuration: :class:`job.configuration.configuration.job_configuration.JobConfiguration`
        :returns: The new queued job
        :rtype: :class:`job.models.Job`

        :raises job.configuration.data.exceptions.InvalidData: If the job data is invalid
        """

        job = Job.objects.create_job(job_type, event)
        if not configuration:
            configuration = JobConfiguration()
        job.configuration = configuration.get_dict()
        job.save()

        # No lock needed for this job since it doesn't exist outside this transaction yet
        Job.objects.populate_job_data(job, data)
        self._queue_jobs([job])

        return job
Exemple #3
0
    def queue_new_job(self, job_type, data, event, configuration=None):
        """Creates a new job for the given type and data. The new job is immediately placed on the queue. The new job,
        job_exe, and queue models are saved in the database in an atomic transaction.

        :param job_type: The type of the new job to create and queue
        :type job_type: :class:`job.models.JobType`
        :param data: The job data to run on
        :type data: :class:`job.configuration.data.job_data.JobData`
        :param event: The event that triggered the creation of this job
        :type event: :class:`trigger.models.TriggerEvent`
        :param configuration: The optional initial job configuration
        :type configuration: :class:`job.configuration.configuration.job_configuration.JobConfiguration`
        :returns: The new queued job
        :rtype: :class:`job.models.Job`

        :raises job.configuration.data.exceptions.InvalidData: If the job data is invalid
        """

        job = Job.objects.create_job(job_type, event)
        if not configuration:
            configuration = JobConfiguration()
        job.configuration = configuration.get_dict()
        job.save()

        # No lock needed for this job since it doesn't exist outside this transaction yet
        Job.objects.populate_job_data(job, data)
        self._queue_jobs([job])

        return job
    def test_populate_default_job_settings(self):
        """Tests the addition of default settings to the configuration."""

        job_config = JobConfiguration()

        config_dict = {
            'version': '1.0',
            'default_settings': {
                'setting_name': 'some_val',
                'setting2': 'other_val'
            }
        }

        job_exe = MagicMock()
        job_exe.get_job_type_configuration.return_value = JobTypeConfiguration(config_dict)

        job_config.populate_default_job_settings(job_exe)

        populated_config = job_config.get_dict()
        populated_settings = populated_config['job_task']['settings']

        populated_setting_values = [x.values() for x in populated_settings]
        results_dict = {x[0]: x[1] for x in populated_setting_values}

        self.assertTrue(results_dict == config_dict['default_settings'])
    def test_init_validation(self):
        """Tests the validation done in __init__"""

        # Try minimal acceptable configuration
        JobConfiguration()

        # Duplicate workspace name in pre-task
        config = {'pre_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]},
                  'job_task': {'workspaces': []}}
        self.assertRaises(InvalidJobConfiguration, JobConfiguration, config)

        # Duplicate workspace name in job-task
        config = {'job_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]}}
        self.assertRaises(InvalidJobConfiguration, JobConfiguration, config)

        # Duplicate workspace name in post-task
        config = {'post_task': {'workspaces': [{'name': 'name1', 'mode': 'ro'}, {'name': 'name1', 'mode': 'ro'}]},
                  'job_task': {'workspaces': []}}
        self.assertRaises(InvalidJobConfiguration, JobConfiguration, config)
Exemple #6
0
def create_job_exe(job_type=None,
                   job=None,
                   status='RUNNING',
                   configuration=None,
                   error=None,
                   command_arguments='test_arg',
                   timeout=None,
                   node=None,
                   created=None,
                   queued=None,
                   started=None,
                   pre_started=None,
                   pre_completed=None,
                   job_started=None,
                   job_completed=None,
                   post_started=None,
                   post_completed=None,
                   ended=None,
                   last_modified=None):
    """Creates a job execution model for unit testing

    :returns: The job execution model
    :rtype: :class:`job.models.JobExecution`
    """

    when = timezone.now()
    if not job:
        job = create_job(job_type=job_type)
    if not configuration:
        configuration = JobConfiguration().get_dict()
    if not timeout:
        timeout = job.timeout
    if not node:
        node = node_utils.create_node()
    if not created:
        created = when
    if not queued:
        queued = when
    if not started:
        started = when
    if not last_modified:
        last_modified = when

    return JobExecution.objects.create(job=job,
                                       status=status,
                                       error=error,
                                       configuration=configuration,
                                       command_arguments=command_arguments,
                                       timeout=timeout,
                                       node=node,
                                       created=created,
                                       queued=queued,
                                       started=started,
                                       pre_started=pre_started,
                                       pre_completed=pre_completed,
                                       job_started=job_started,
                                       job_completed=job_completed,
                                       post_started=post_started,
                                       post_completed=post_completed,
                                       ended=ended,
                                       last_modified=last_modified)
 def test_successful(self, mock_get_dict):
     """Tests calling JobConfiguration.update() successfully."""
     mock_get_dict.return_value = self.job_configuration_dict
     job_configuration = JobConfiguration.convert_configuration(self.job_configuration_dict)
     self.assertEqual(job_configuration['version'], '1.1')
     self.assertFalse(job_configuration['job_task']['settings'])
    def populate_job_configuration(apps, schema_editor):
        from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW
        from job.configuration.data.job_data import JobData
        # Go through all of the job models that have job data and populate their configuration
        Job = apps.get_model('job', 'Job')
        ScaleFile = apps.get_model('storage', 'ScaleFile')
        Workspace = apps.get_model('storage', 'Workspace')
        total_count = Job.objects.all().count()
        workspaces = {}
        for workspace in Workspace.objects.all().iterator():
            workspaces[workspace.id] = workspace
        print 'Populating new configuration field for %s jobs' % str(
            total_count)
        done_count = 0
        batch_size = 1000
        while done_count < total_count:
            percent = (float(done_count) / float(total_count)) * 100.00
            print 'Completed %s of %s jobs (%f%%)' % (done_count, total_count,
                                                      percent)
            batch_end = done_count + batch_size
            for job in Job.objects.select_related('job_type').order_by(
                    'id')[done_count:batch_end]:

                # Ignore jobs that don't have their job data populated yet
                if not job.data:
                    continue

                data = JobData(job.data)
                input_file_ids = data.get_input_file_ids()
                input_files = ScaleFile.objects.filter(
                    id__in=input_file_ids).select_related(
                        'workspace').iterator()
                input_workspaces = set()
                for input_file in input_files:
                    input_workspaces.add(input_file.workspace.name)

                configuration = JobConfiguration()
                for name in input_workspaces:
                    configuration.add_job_task_workspace(name, MODE_RO)
                if not job.job_type.is_system:
                    for name in input_workspaces:
                        configuration.add_pre_task_workspace(name, MODE_RO)
                        # We add input workspaces to post task so it can perform a parse results move if requested by the
                        # job's results manifest
                        configuration.add_post_task_workspace(name, MODE_RW)
                    for workspace_id in data.get_output_workspace_ids():
                        workspace = workspaces[workspace_id]
                        if workspace.name not in input_workspaces:
                            configuration.add_post_task_workspace(
                                workspace.name, MODE_RW)
                elif job.job_type.name == 'scale-ingest':
                    ingest_id = data.get_property_values(['Ingest ID'
                                                          ])['Ingest ID']
                    from ingest.models import Ingest
                    ingest = Ingest.objects.select_related('workspace').get(
                        id=ingest_id)
                    configuration.add_job_task_workspace(
                        ingest.workspace.name, MODE_RW)

                job.configuration = configuration.get_dict()
                job.save()
            done_count += batch_size
        print 'All %s jobs completed' % str(total_count)
    def populate_job_configuration(apps, schema_editor):
        from job.configuration.configuration.job_configuration import JobConfiguration, MODE_RO, MODE_RW
        from job.configuration.data.job_data import JobData

        # Go through all of the job models that have job data and populate their configuration
        Job = apps.get_model("job", "Job")
        ScaleFile = apps.get_model("storage", "ScaleFile")
        Workspace = apps.get_model("storage", "Workspace")
        total_count = Job.objects.all().count()
        workspaces = {}
        for workspace in Workspace.objects.all().iterator():
            workspaces[workspace.id] = workspace
        print "Populating new configuration field for %s jobs" % str(total_count)
        done_count = 0
        batch_size = 1000
        while done_count < total_count:
            percent = (float(done_count) / float(total_count)) * 100.00
            print "Completed %s of %s jobs (%f%%)" % (done_count, total_count, percent)
            batch_end = done_count + batch_size
            for job in Job.objects.select_related("job_type").order_by("id")[done_count:batch_end]:

                # Ignore jobs that don't have their job data populated yet
                if not job.data:
                    continue

                data = JobData(job.data)
                input_file_ids = data.get_input_file_ids()
                input_files = ScaleFile.objects.filter(id__in=input_file_ids).select_related("workspace").iterator()
                input_workspaces = set()
                for input_file in input_files:
                    input_workspaces.add(input_file.workspace.name)

                configuration = JobConfiguration()
                for name in input_workspaces:
                    configuration.add_job_task_workspace(name, MODE_RO)
                if not job.job_type.is_system:
                    for name in input_workspaces:
                        configuration.add_pre_task_workspace(name, MODE_RO)
                        # We add input workspaces to post task so it can perform a parse results move if requested by the
                        # job's results manifest
                        configuration.add_post_task_workspace(name, MODE_RW)
                    for workspace_id in data.get_output_workspace_ids():
                        workspace = workspaces[workspace_id]
                        if workspace.name not in input_workspaces:
                            configuration.add_post_task_workspace(workspace.name, MODE_RW)
                elif job.job_type.name == "scale-ingest":
                    ingest_id = data.get_property_values(["Ingest ID"])["Ingest ID"]
                    from ingest.models import Ingest

                    ingest = Ingest.objects.select_related("workspace").get(id=ingest_id)
                    configuration.add_job_task_workspace(ingest.workspace.name, MODE_RW)

                job.configuration = configuration.get_dict()
                job.save()
            done_count += batch_size
        print "All %s jobs completed" % str(total_count)