コード例 #1
0
    def setUp(self):
        demo_user = m.User.objects.get(name='demo')
        workflow = mommy.make('Workflow', user=demo_user)
        self.job = mommy.make('Job', workflow=workflow, order=0)
        self.td = mommy.make('ToolDesc', name='my-tool')
        self.tool = mommy.make('Tool', desc=self.td, path='tool.sh')
        combined_with_equals = ParameterSwitchUse.objects.get(
            display_text='combined with equals')
        value_only = ParameterSwitchUse.objects.get(display_text='valueOnly')
        mommy.make('ToolParameter',
                   tool=self.td,
                   switch="-arg1",
                   switch_use=combined_with_equals,
                   rank=2)
        mommy.make('ToolParameter',
                   tool=self.td,
                   switch="-arg2",
                   switch_use=value_only,
                   rank=1)
        mommy.make('ToolParameter',
                   tool=self.td,
                   switch="-arg3",
                   switch_use=value_only,
                   file_assignment='batch')

        self.template = CommandTemplate()
        self.job_1_dict = {
            "jobId": 1,
            "toolName": "my-tool",
            "toolId": self.tool.id,
            "parameterList": {
                "parameter": []
            }
        }
コード例 #2
0
ファイル: enginemodels.py プロジェクト: viromehunter/yabi
    def __init__(self, *args, **kwargs):
        ret = Job.__init__(self, *args, **kwargs)
        if self.command_template:
            try:
                self.template = CommandTemplate()
                self.template.deserialise(self.command_template)
            except ValueError:
                logger.warning(
                    "Unable to deserialise command_template on engine job id: %s"
                    % self.id)

        else:
            self.template = None
        return ret
コード例 #3
0
ファイル: enginemodels.py プロジェクト: muccg/yabi
    def __init__(self, *args, **kwargs):
        ret = Job.__init__(self, *args, **kwargs)
        if self.command_template:
            try:
                self.template = CommandTemplate()
                self.template.deserialise(self.command_template)
            except ValueError:
                logger.warning("Unable to deserialise command_template on engine job id: %s" % self.id)

        else:
            self.template = None
        return ret
コード例 #4
0
ファイル: enginemodels.py プロジェクト: viromehunter/yabi
    def add_job(self, job_dict):
        assert (job_dict)
        assert (job_dict["toolName"])
        logger.debug(job_dict["toolName"])

        template = CommandTemplate()
        template.setup(self, job_dict)
        template.parse_parameter_description()

        self.job_dict = job_dict
        if "toolId" not in job_dict:
            raise InvalidRequestError("Submitted job %s lacks toolId" %
                                      job_dict["toolName"])
        self.tool = Tool.objects.get(id=job_dict["toolId"])
        if not self.tool.enabled:
            raise InvalidRequestError(
                "Can't process workflow with disabled tool '%s'" %
                self.tool.name)
        if not self.tool.does_user_have_access_to(self.user):
            raise InvalidRequestError(
                "Can't process workflow with inaccessible tool '%s'" %
                self.tool.name)

        # lets work out the highest copy level supported by this tool and store it in job. This makes no account for the backends capabilities.
        # that will be resolved later when the stagein is created during the walk
        self.preferred_stagein_method = 'link' if self.tool.link_supported else 'lcopy' if self.tool.lcopy_supported else 'copy'

        self.preferred_stageout_method = 'lcopy' if self.tool.lcopy_supported else 'copy'  # stageouts should never be linked. Only local copy or remote copy

        # cache job for later reference
        self.command_template = template.serialise()
        self.command = str(template)  # text description of command

        self.status = const.STATUS_PENDING
        self.stageout = "%s%s/" % (
            self.workflow.stageout, "%d - %s" %
            (self.order + 1, self.tool.get_display_name()))
        self.exec_backend = self.get_backend_uri(self.exec_credential)
        self.fs_backend = self.get_backend_uri(self.fs_credential)
        self.cpus = self.tool.cpus
        self.walltime = self.tool.walltime
        self.module = self.tool.module
        self.queue = self.tool.queue
        self.max_memory = self.tool.max_memory
        self.job_type = self.tool.job_type

        self.save()
コード例 #5
0
ファイル: enginemodels.py プロジェクト: muccg/yabi
    def add_job(self, job_dict):
        assert(job_dict)
        assert(job_dict["toolName"])
        logger.debug(job_dict["toolName"])

        template = CommandTemplate()
        template.setup(self, job_dict)
        template.parse_parameter_description()

        self.job_dict = job_dict
        if "toolId" not in job_dict:
            raise InvalidRequestError("Submitted job %s lacks toolId" % job_dict["toolName"])
        self.tool = Tool.objects.get(id=job_dict["toolId"])
        if not self.tool.enabled:
            raise InvalidRequestError("Can't process workflow with disabled tool '%s'" % self.tool.name)
        if not self.tool.does_user_have_access_to(self.user):
            raise InvalidRequestError("Can't process workflow with inaccessible tool '%s'" % self.tool.name)

        # lets work out the highest copy level supported by this tool and store it in job. This makes no account for the backends capabilities.
        # that will be resolved later when the stagein is created during the walk
        self.preferred_stagein_method = 'link' if self.tool.link_supported else 'lcopy' if self.tool.lcopy_supported else 'copy'

        self.preferred_stageout_method = 'lcopy' if self.tool.lcopy_supported else 'copy'                                                   # stageouts should never be linked. Only local copy or remote copy

        # cache job for later reference
        self.command_template = template.serialise()
        self.command = str(template)                    # text description of command

        self.status = const.STATUS_PENDING
        self.stageout = "%s%s/" % (self.workflow.stageout, "%d - %s" % (self.order + 1, self.tool.get_display_name()))
        self.exec_backend = self.get_backend_uri(self.exec_credential)
        self.fs_backend = self.get_backend_uri(self.fs_credential)
        self.cpus = self.tool.cpus
        self.walltime = self.tool.walltime
        self.module = self.tool.module
        self.queue = self.tool.queue
        self.max_memory = self.tool.max_memory
        self.job_type = self.tool.job_type

        self.save()
コード例 #6
0
ファイル: enginemodels.py プロジェクト: viromehunter/yabi
class EngineJob(Job):
    class Meta:
        proxy = True

    def __init__(self, *args, **kwargs):
        ret = Job.__init__(self, *args, **kwargs)
        if self.command_template:
            try:
                self.template = CommandTemplate()
                self.template.deserialise(self.command_template)
            except ValueError:
                logger.warning(
                    "Unable to deserialise command_template on engine job id: %s"
                    % self.id)

        else:
            self.template = None
        return ret

    @property
    def extensions(self):
        '''Reconstitute the input filetype extension list so each create_task can use it'''
        extensions = []
        if self.other_files:
            extensions = (self.other_files)
        return extensions

    def _get_be_cred(self, backend, be_type):
        if backend.is_nullbackend:
            return None
        full_term = Q(credential__user=self.workflow.user) & Q(backend=backend)

        try:
            rval = BackendCredential.objects.get(full_term)
            return rval
        except (ObjectDoesNotExist, MultipleObjectsReturned):
            logger.critical(
                'Invalid %s backend credentials for user: %s and backend: %s' %
                (be_type, self.workflow.user, self.tool.backend))
            ebcs = BackendCredential.objects.filter(full_term)
            logger.debug("EBCS returned: %s" % ebcs)
            for bc in ebcs:
                logger.debug("%s: Backend: %s Credential: %s" %
                             (bc, bc.credential, bc.backend))
            raise

    @property
    def exec_credential(self):
        return self._get_be_cred(self.tool.backend, 'execution')

    @property
    def fs_credential(self):
        return self._get_be_cred(self.tool.fs_backend, 'FS')

    def update_dependencies(self):
        self.template.update_dependencies(
            self.workflow, ignored_patterns=DEPENDENCIES_EXCLUDED_PATTERNS)
        return self.template.dependencies

    def has_incomplete_dependencies(self):
        """Check each of the dependencies (previous jobs that must be completed) in the jobs command params.
           The only dependency we have are yabi:// style references in batch_files
        """
        logger.info('Check dependencies for jobid: %s...' % self.id)
        return self.update_dependencies() != 0

    def make_tasks_ready(self):
        for task in EngineTask.objects.filter(job=self):
            # status is a property not an individual model field
            task.status = const.STATUS_READY
            task.save()

    def get_backend_uri(self, credential):
        if credential is None:
            return 'null://%[email protected]/' % self.workflow.user.name
        return credential.homedir_uri

    def add_job(self, job_dict):
        assert (job_dict)
        assert (job_dict["toolName"])
        logger.debug(job_dict["toolName"])

        template = CommandTemplate()
        template.setup(self, job_dict)
        template.parse_parameter_description()

        self.job_dict = job_dict
        if "toolId" not in job_dict:
            raise InvalidRequestError("Submitted job %s lacks toolId" %
                                      job_dict["toolName"])
        self.tool = Tool.objects.get(id=job_dict["toolId"])
        if not self.tool.enabled:
            raise InvalidRequestError(
                "Can't process workflow with disabled tool '%s'" %
                self.tool.name)
        if not self.tool.does_user_have_access_to(self.user):
            raise InvalidRequestError(
                "Can't process workflow with inaccessible tool '%s'" %
                self.tool.name)

        # lets work out the highest copy level supported by this tool and store it in job. This makes no account for the backends capabilities.
        # that will be resolved later when the stagein is created during the walk
        self.preferred_stagein_method = 'link' if self.tool.link_supported else 'lcopy' if self.tool.lcopy_supported else 'copy'

        self.preferred_stageout_method = 'lcopy' if self.tool.lcopy_supported else 'copy'  # stageouts should never be linked. Only local copy or remote copy

        # cache job for later reference
        self.command_template = template.serialise()
        self.command = str(template)  # text description of command

        self.status = const.STATUS_PENDING
        self.stageout = "%s%s/" % (
            self.workflow.stageout, "%d - %s" %
            (self.order + 1, self.tool.get_display_name()))
        self.exec_backend = self.get_backend_uri(self.exec_credential)
        self.fs_backend = self.get_backend_uri(self.fs_credential)
        self.cpus = self.tool.cpus
        self.walltime = self.tool.walltime
        self.module = self.tool.module
        self.queue = self.tool.queue
        self.max_memory = self.tool.max_memory
        self.job_type = self.tool.job_type

        self.save()

    @transaction.atomic
    def create_tasks(self):
        job_logger = create_job_logger(logger, self.pk)
        logger.debug('----- creating tasks for Job %s -----' % self.pk)
        assert self.total_tasks() == 0, "Job already has tasks"

        updated = Job.objects.filter(pk=self.pk,
                                     status=const.STATUS_PENDING).update(
                                         status=const.JOB_STATUS_PROCESSING)
        if updated == 0:
            job_logger.info(
                "Another process_jobs() must have picked up job %s already" %
                self.pk)
            return

        self.update_dependencies()

        input_files = self.get_input_files()
        self.create_one_task_for_each_input_file(input_files)

        # there must be at least one task for every job
        if not self.total_tasks():
            job_logger.critical('No tasks for job: %s' % self.pk)
            raise Exception('No tasks for job: %s' % self.pk)

        # mark job as ready so it can be requested by a backend
        self.status = const.STATUS_READY
        self.save()
        self.make_tasks_ready()

        return self.total_tasks()

    def get_input_files(self):
        if self.template.command.is_select_file:
            return []
        input_files = [X for X in self.template.file_sets()]
        return input_files

    def create_one_task_for_each_input_file(self, input_files):
        logger.debug("job %s is having tasks created for %s input files" %
                     (self.pk, len(input_files)))
        if len(input_files) == 0:
            input_files = [None]

        # lets count up our batch_file_list to see how many files there are to process
        # won't count tasks with file == None as these are from not batch param jobs
        count = len(list(filter(lambda x: x is not None, input_files)))
        left_padded_with_zeros = "{0:0>%s}" % len(str(count))

        self.task_total = len(input_files)

        for task_num, input_file in enumerate(input_files, 1):
            task = EngineTask(job=self,
                              status=const.STATUS_PENDING,
                              start_time=datetime.datetime.now(),
                              task_num=task_num)

            task_name = left_padded_with_zeros.format(
                task_num) if count > 1 else ""
            task.add_task(input_file, task_name)

    def progress_score(self):
        tasks = Task.objects.filter(job=self)
        score = 0.0
        for task in tasks:
            score += task.percent_complete if task.percent_complete is not None else 0.0

        return score

    def total_tasks(self):
        tasknum = float(len(Task.objects.filter(job=self)))
        return tasknum

    def ready_tasks(self):
        return self.task_set.filter(status_requested__isnull=True,
                                    status_ready__isnull=False).order_by('id')

    def has_errored_tasks(self):
        return [
            X.error_msg for X in Task.objects.filter(job=self)
            if X.status == const.STATUS_ERROR
        ] != []

    def get_errored_tasks_messages(self):
        return [
            X.error_msg for X in Task.objects.filter(job=self)
            if X.status == const.STATUS_ERROR
        ]

    def as_dict(self):
        # TODO This will have to be able to generate the full JSON
        # In this step of the refactoring it will just get it's json from the workflow
        # UPDATE CW - the following json.loads line is failing with unwalked workflows. Refactoring needs to be completed
        # HACK CW - short circuit the function so front end can get a response rather than an error.
        if not self.workflow.original_json:
            return {}
        workflow_dict = json.loads(self.workflow.original_json)
        job_id = int(self.order)
        job_dict = workflow_dict['jobs'][job_id]
        assert job_dict['jobId'] == job_id + 1  # jobs are 1 indexed in json

        job_dict['status'] = self.status
        job_dict['is_retrying'] = self.is_retrying
        job_dict['tasksComplete'] = float(self.progress_score())
        job_dict['tasksTotal'] = float(self.total_tasks())

        if self.status == const.STATUS_ERROR:
            job_dict['errorMessage'] = str(self.get_errored_tasks_messages())

        if self.stageout:
            job_dict['stageout'] = self.stageout
        return job_dict
コード例 #7
0
ファイル: enginemodels.py プロジェクト: muccg/yabi
class EngineJob(Job):

    class Meta:
        proxy = True

    def __init__(self, *args, **kwargs):
        ret = Job.__init__(self, *args, **kwargs)
        if self.command_template:
            try:
                self.template = CommandTemplate()
                self.template.deserialise(self.command_template)
            except ValueError:
                logger.warning("Unable to deserialise command_template on engine job id: %s" % self.id)

        else:
            self.template = None
        return ret

    @property
    def extensions(self):
        '''Reconstitute the input filetype extension list so each create_task can use it'''
        extensions = []
        if self.other_files:
            extensions = (self.other_files)
        return extensions

    def _get_be_cred(self, backend, be_type):
        if backend.is_nullbackend:
            return None
        full_term = Q(credential__user=self.workflow.user) & Q(backend=backend)

        try:
            rval = BackendCredential.objects.get(full_term)
            return rval
        except (ObjectDoesNotExist, MultipleObjectsReturned):
            logger.critical('Invalid %s backend credentials for user: %s and backend: %s' % (be_type, self.workflow.user, self.tool.backend))
            ebcs = BackendCredential.objects.filter(full_term)
            logger.debug("EBCS returned: %s" % ebcs)
            for bc in ebcs:
                logger.debug("%s: Backend: %s Credential: %s" % (bc, bc.credential, bc.backend))
            raise

    @property
    def exec_credential(self):
        return self._get_be_cred(self.tool.backend, 'execution')

    @property
    def fs_credential(self):
        return self._get_be_cred(self.tool.fs_backend, 'FS')

    def update_dependencies(self):
        self.template.update_dependencies(self.workflow, ignored_patterns=DEPENDENCIES_EXCLUDED_PATTERNS)
        return self.template.dependencies

    def has_incomplete_dependencies(self):
        """Check each of the dependencies (previous jobs that must be completed) in the jobs command params.
           The only dependency we have are yabi:// style references in batch_files
        """
        logger.info('Check dependencies for jobid: %s...' % self.id)
        return self.update_dependencies() != 0

    def make_tasks_ready(self):
        for task in EngineTask.objects.filter(job=self):
            # status is a property not an individual model field
            task.status = const.STATUS_READY
            task.save()

    def get_backend_uri(self, credential):
        if credential is None:
            return 'null://%[email protected]/' % self.workflow.user.name
        return credential.homedir_uri

    def add_job(self, job_dict):
        assert(job_dict)
        assert(job_dict["toolName"])
        logger.debug(job_dict["toolName"])

        template = CommandTemplate()
        template.setup(self, job_dict)
        template.parse_parameter_description()

        self.job_dict = job_dict
        if "toolId" not in job_dict:
            raise InvalidRequestError("Submitted job %s lacks toolId" % job_dict["toolName"])
        self.tool = Tool.objects.get(id=job_dict["toolId"])
        if not self.tool.enabled:
            raise InvalidRequestError("Can't process workflow with disabled tool '%s'" % self.tool.name)
        if not self.tool.does_user_have_access_to(self.user):
            raise InvalidRequestError("Can't process workflow with inaccessible tool '%s'" % self.tool.name)

        # lets work out the highest copy level supported by this tool and store it in job. This makes no account for the backends capabilities.
        # that will be resolved later when the stagein is created during the walk
        self.preferred_stagein_method = 'link' if self.tool.link_supported else 'lcopy' if self.tool.lcopy_supported else 'copy'

        self.preferred_stageout_method = 'lcopy' if self.tool.lcopy_supported else 'copy'                                                   # stageouts should never be linked. Only local copy or remote copy

        # cache job for later reference
        self.command_template = template.serialise()
        self.command = str(template)                    # text description of command

        self.status = const.STATUS_PENDING
        self.stageout = "%s%s/" % (self.workflow.stageout, "%d - %s" % (self.order + 1, self.tool.get_display_name()))
        self.exec_backend = self.get_backend_uri(self.exec_credential)
        self.fs_backend = self.get_backend_uri(self.fs_credential)
        self.cpus = self.tool.cpus
        self.walltime = self.tool.walltime
        self.module = self.tool.module
        self.queue = self.tool.queue
        self.max_memory = self.tool.max_memory
        self.job_type = self.tool.job_type

        self.save()

    @transaction.atomic
    def create_tasks(self):
        job_logger = create_job_logger(logger, self.pk)
        logger.debug('----- creating tasks for Job %s -----' % self.pk)
        assert self.total_tasks() == 0, "Job already has tasks"

        updated = Job.objects.filter(pk=self.pk, status=const.STATUS_PENDING).update(status=const.JOB_STATUS_PROCESSING)
        if updated == 0:
            job_logger.info("Another process_jobs() must have picked up job %s already" % self.pk)
            return

        self.update_dependencies()

        input_files = self.get_input_files()
        self.create_one_task_for_each_input_file(input_files)

        # there must be at least one task for every job
        if not self.total_tasks():
            job_logger.critical('No tasks for job: %s' % self.pk)
            raise Exception('No tasks for job: %s' % self.pk)

        # mark job as ready so it can be requested by a backend
        self.status = const.STATUS_READY
        self.save()
        self.make_tasks_ready()

        return self.total_tasks()

    def get_input_files(self):
        if self.template.command.is_select_file:
            return []
        input_files = [X for X in self.template.file_sets()]
        return input_files

    def create_one_task_for_each_input_file(self, input_files):
        logger.debug("job %s is having tasks created for %s input files" % (self.pk, len(input_files)))
        if len(input_files) == 0:
            input_files = [None]

        # lets count up our batch_file_list to see how many files there are to process
        # won't count tasks with file == None as these are from not batch param jobs
        count = len(list(filter(lambda x: x is not None, input_files)))
        left_padded_with_zeros = "{0:0>%s}" % len(str(count))

        self.task_total = len(input_files)

        for task_num, input_file in enumerate(input_files, 1):
            task = EngineTask(job=self, status=const.STATUS_PENDING,
                              start_time=datetime.datetime.now(),
                              task_num=task_num)

            task_name = left_padded_with_zeros.format(task_num) if count > 1 else ""
            task.add_task(input_file, task_name)

    def progress_score(self):
        tasks = Task.objects.filter(job=self)
        score = 0.0
        for task in tasks:
            score += task.percent_complete if task.percent_complete is not None else 0.0

        return score

    def total_tasks(self):
        tasknum = float(len(Task.objects.filter(job=self)))
        return tasknum

    def ready_tasks(self):
        return self.task_set.filter(status_requested__isnull=True, status_ready__isnull=False).order_by('id')

    def has_errored_tasks(self):
        return [X.error_msg for X in Task.objects.filter(job=self) if X.status == const.STATUS_ERROR] != []

    def get_errored_tasks_messages(self):
        return [X.error_msg for X in Task.objects.filter(job=self) if X.status == const.STATUS_ERROR]

    def as_dict(self):
        # TODO This will have to be able to generate the full JSON
        # In this step of the refactoring it will just get it's json from the workflow
        # UPDATE CW - the following json.loads line is failing with unwalked workflows. Refactoring needs to be completed
        # HACK CW - short circuit the function so front end can get a response rather than an error.
        if not self.workflow.original_json:
            return {}
        workflow_dict = json.loads(self.workflow.original_json)
        job_id = int(self.order)
        job_dict = workflow_dict['jobs'][job_id]
        assert job_dict['jobId'] == job_id + 1  # jobs are 1 indexed in json

        job_dict['status'] = self.status
        job_dict['is_retrying'] = self.is_retrying
        job_dict['tasksComplete'] = float(self.progress_score())
        job_dict['tasksTotal'] = float(self.total_tasks())

        if self.status == const.STATUS_ERROR:
            job_dict['errorMessage'] = str(self.get_errored_tasks_messages())

        if self.stageout:
            job_dict['stageout'] = self.stageout
        return job_dict
コード例 #8
0
class CommandLineTemplateTest(unittest.TestCase):
    def setUp(self):
        demo_user = m.User.objects.get(name='demo')
        workflow = mommy.make('Workflow', user=demo_user)
        self.job = mommy.make('Job', workflow=workflow, order=0)
        self.td = mommy.make('ToolDesc', name='my-tool')
        self.tool = mommy.make('Tool', desc=self.td, path='tool.sh')
        combined_with_equals = ParameterSwitchUse.objects.get(
            display_text='combined with equals')
        value_only = ParameterSwitchUse.objects.get(display_text='valueOnly')
        mommy.make('ToolParameter',
                   tool=self.td,
                   switch="-arg1",
                   switch_use=combined_with_equals,
                   rank=2)
        mommy.make('ToolParameter',
                   tool=self.td,
                   switch="-arg2",
                   switch_use=value_only,
                   rank=1)
        mommy.make('ToolParameter',
                   tool=self.td,
                   switch="-arg3",
                   switch_use=value_only,
                   file_assignment='batch')

        self.template = CommandTemplate()
        self.job_1_dict = {
            "jobId": 1,
            "toolName": "my-tool",
            "toolId": self.tool.id,
            "parameterList": {
                "parameter": []
            }
        }

    def tearDown(self):
        self.job.workflow.delete()
        self.td.delete()

    def job_dict_with_params(self, *params):
        import copy
        d = copy.copy(self.job_1_dict)
        if params:
            d['parameterList']['parameter'] = params
        return d

    def render_command(self, job, job_dict, uri_conversion=None):
        self.template.setup(job, job_dict)
        self.template.parse_parameter_description()

        if uri_conversion is not None:
            self.template.set_uri_conversion(uri_conversion)

        return self.template.render()

    def test_no_params(self):
        job_dict = self.job_1_dict

        command = self.render_command(self.job, job_dict)

        self.assertEquals("tool.sh", command)

    def test_param_combined_with_equals(self):
        job_dict = self.job_dict_with_params({
            'switchName': '-arg1',
            'valid': True,
            'value': ['value']
        })

        command = self.render_command(self.job, job_dict)

        self.assertEquals('tool.sh -arg1=value', command)

    def test_param_value_only(self):
        job_dict = self.job_dict_with_params({
            'switchName': '-arg2',
            'valid': True,
            'value': ['a value']
        })

        command = self.render_command(self.job, job_dict)

        self.assertEquals("tool.sh 'a value'", command)

    def test_rank_respected(self):
        job_dict = self.job_dict_with_params(
            {
                'switchName': '-arg1',
                'valid': True,
                'value': ['value']
            }, {
                'switchName': '-arg2',
                'valid': True,
                'value': ['other value']
            })

        command = self.render_command(self.job, job_dict)

        self.assertEquals("tool.sh 'other value' -arg1=value", command)

    def test_direct_file_reference(self):
        job_dict = self.job_dict_with_params({
            'switchName':
            '-arg3',
            'valid':
            True,
            'value': [{
                'path': ['some', 'path'],
                'root': 'sftp://demo@localhost:22/',
                'type': 'file',
                'filename': 'a file.txt'
            }]
        })

        command = self.render_command(
            self.job,
            job_dict,
            uri_conversion='/tools/workdir/input/%(filename)s')

        self.assertEquals("tool.sh '/tools/workdir/input/a file.txt'", command)