コード例 #1
ファイル: Workflow.py プロジェクト: Yixf-Self/COSMOS2
    def add_task(self,
        Adds a new Task to the Workflow.  If the Task already exists (and was successful), return the successful Task stored in the database

        :param func func: A function which returns a string which will get converted to a shell script to be executed.  Func will not get called until
          all of its dependencies have completed.
        :param dict params: Parameters to `func`.  Must be jsonable so that it can be stored in the database.  Any Dependency objects will get resolved into
            a string, and the Dependency.task will be added to this Task's parents.
        :param list[Tasks] parents: A list of dependent Tasks.
        :param str uid: A unique identifier for this Task, primarily used for skipping  previously successful Tasks.
            If a Task with this stage_name and uid already exists in the database (and was successful), the
            database version will be returned and a new one will not be created.
        :param str stage_name: The name of the Stage to add this Task to.  Defaults to a title()ed __name__ of `func`.
        :param str drm: The drm to use for this Task (example 'local' or 'ge')
        from cosmos.models.Stage import Stage
        from cosmos import recursive_resolve_dependency
        from cosmos.api import Dependency

        # parents
        if isinstance(parents, types.GeneratorType):
            parents = list(parents)
        if parents is None:
            parents = []
        if isinstance(parents, Task):
            parents = [parents]

        # params
        if params is None:
            params = dict()
        for k, v in params.iteritems():
            # decompose `Dependency` objects to values and parents
            new_val, parent_tasks = recursive_resolve_dependency(v)

            params[k] = new_val
            parents.extend(parent_tasks - set(parents))

        # uid
        if uid is None:
            raise AssertionError, 'uid parameter must be specified'
            # Fix me assert params are all JSONable
            # uid = str(params)
            assert isinstance(uid, basestring), 'uid must be a string'

        # stage_name
        if stage_name is None:
            stage_name = str(func.__name__).replace('_', ' ').title().replace(
                ' ', '_')

        # Get the right Stage
        stage = only_one((s for s in self.stages if s.name == stage_name),
        if stage is None:
            stage = Stage(workflow=self, name=stage_name)

        # Check if task is already in stage
        task = stage.get_task(uid, None)

        if task is not None:
            # if task is already in stage, but unsuccessful, raise an error (duplicate params) since unsuccessful tasks
            # were already removed on workflow load
            if task.successful:
                return task
                # TODO check for duplicate params here?  would be a lot faster at Workflow.run
                raise ValueError(
                    'Duplicate uid, you have added a Task to Stage %s with the uid (unique identifier) `%s` twice.  '
                    'Task uids must be unique within the same Stage.' %
                    (stage_name, uid))
            # Create Task

            # input_map, output_map = io.get_io_map(task_func, task_params, parents, stage.name, out_dir, self.output_dir)
            # input_files = io.unpack_io_map(input_map)
            # output_files = io.unpack_io_map(output_map)
            sig = funcsigs.signature(func)

            # Check required parameters are specified
            # for keyword, parameter in sig.parameters.iteritems():
            # if parameter.default is funcsigs._empty and keyword not in params:
            #         raise AssertionError, 'Parameter %s is required for %s' % (keyword, func)

            def params_or_signature_default_or(name, default):
                if name in params:
                    return params[name]
                if name in sig.parameters:
                    param_default = sig.parameters[name].default
                    if param_default is funcsigs._empty:
                        return default
                        return param_default
                return default

            input_map = dict()
            output_map = dict()

            for keyword, param in sig.parameters.iteritems():
                if keyword.startswith('in_'):
                    v = params.get(keyword, param.default)
                    assert v != funcsigs._empty, 'parameter %s for %s is required' % (
                        param, func)
                    input_map[keyword] = v
                elif keyword.startswith('out_'):
                    v = params.get(keyword, param.default)
                    assert v != funcsigs._empty, 'parameter %s for %s is required' % (
                        param, func)
                    output_map[keyword] = v

            task = Task(
                drm=drm or self.cosmos_app.default_drm,
                core_req=params_or_signature_default_or('core_req', 1),
                    'must_succeed', True),
                mem_req=params_or_signature_default_or('mem_req', None),
                time_req=params_or_signature_default_or('time_req', None))

            task.cmd_fxn = func
            # task.input_map = input_map
            # task.output_map = output_map
            # task.call_kwargs = call_kwargs

        # Add Stage Dependencies
        for p in parents:
            if p.stage not in stage.parents:


        return task
コード例 #2
    def add_task(
        Adds a new Task to the Workflow.  If the Task already exists (and was successful), return the successful Task stored in the database

        :param callable func: A function which returns a string which will get converted to a shell script to be executed.  `func` will not get called until
          all of its dependencies have completed.
        :param dict params: Parameters to `func`.  Must be jsonable so that it can be stored in the database.  Any Dependency objects will get resolved into
            a string, and the Dependency.task will be added to this Task's parents.
        :param list[Tasks] parents: A list of dependent Tasks.
        :param str uid: A unique identifier for this Task, primarily used for skipping  previously successful Tasks.
            If a Task with this stage_name and uid already exists in the database (and was successful), the
            database version will be returned and a new one will not be created.
        :param str stage_name: The name of the Stage to add this Task to.  Defaults to `func.__name__`.
        :param str drm: The drm to use for this Task (example 'local', 'ge' or 'drmaa:lsf').  Defaults to the `default_drm` parameter of :meth:`Cosmos.start`
        :param job_class: The name of a job_class to submit to; defaults to the `default_job_class` parameter of :meth:`Cosmos.start`
        :param queue: The name of a queue to submit to; defaults to the `default_queue` parameter of :meth:`Cosmos.start`
        :param bool must_succeed: Default True.  If False, the Workflow will not fail if this Task does not succeed.  Dependent Jobs will not be executed.
        :param bool time_req: The time requirement; will set the Task.time_req attribute which is intended to be used by :func:`get_submit_args` to request resources.
        :param int core_req: Number of cpus required for this Task.  Can also be set in the `params` dict or the default value of the Task function signature, but this value takes precedence.
            Warning!  In future versions, this will be the only way to set it.
        :param int mem_req: Number of MB of RAM required for this Task.   Can also be set in the `params` dict or the default value of the Task function signature, but this value takes predence.
            Warning!  In future versions, this will be the only way to set it.
        :param int gpu_req: Number of gpus required for this Task.
        :param int max_attempts: The maximum number of times to retry a failed job.  Defaults to the `default_max_attempts` parameter of :meth:`Cosmos.start`
        :param bool noop: Task is a No-op and will always be marked as successful.
        :param dict drm_options: Options for Distributed Resource Management (cluster).
        :param dict environment_variables: Environment variables to pass to the DRM (if supported).
        :param str if_duplicate: If "raise", raises an error if a Task with the same UID has already been added to this
          Workflow.  If "return", return that Task, allowing for an easy way to avoid duplicate work.
        :rtype: cosmos.api.Task
        # Avoid cyclical import dependencies
        from cosmos.job.drm.DRM_Base import DRM
        from cosmos.models.Stage import Stage
        from cosmos import recursive_resolve_dependency

        # parents
        if parents is None:
            parents = []
        elif isinstance(parents, Task):
            parents = [parents]
            parents = list(parents)

        # params
        if params is None:
            params = dict()
        for k, v in list(params.items()):
            # decompose `Dependency` objects to values and parents
            new_val, parent_tasks = recursive_resolve_dependency(v)

            params[k] = new_val
            parents.extend(parent_tasks - set(parents))

        # uid
        if uid is None:
            raise AssertionError("uid parameter must be specified")
            # Fix me assert params are all JSONable
            # uid = str(params)
            assert isinstance(uid, str), "uid must be a string"

        if stage_name is None:
            stage_name = str(func.__name__)

        # Get the right Stage
        stage = only_one((s for s in self.stages if s.name == stage_name),
        if stage is None:
            stage = Stage(workflow=self,

        # Check if task is already in stage
        task = stage.get_task(uid, None)

        if task is not None:
            # if task is already in stage, but unsuccessful, raise an error (duplicate params) since unsuccessful tasks
            # were already removed on workflow load
            if task.successful:
                # If the user manually edited the dag and this a resume, parents might need to be-readded

                for p in parents:
                    if p.stage not in stage.parents:

                return task
                if if_duplicate == "raise":
                    raise DuplicateUid(
                        "Duplicate uid, you have added a Task to Stage %s with the uid (unique identifier) `%s` twice.  "
                        "Task uids must be unique within the same Stage." %
                        (stage_name, uid))
                elif if_duplicate == "return":
                    if task.params != params:
                        raise InvalidParams(
                            f"Tried to add a task with the same uid, but different parameters."
                    return task
                    raise ValueError(f"{if_duplicate} is not valid")
            # Create Task
            sig = funcsigs.signature(func)

            def params_or_signature_default_or(name, default):
                if name in params:
                    return params[name]
                if name in sig.parameters:
                    param_default = sig.parameters[name].default
                    if param_default is funcsigs._empty:
                        return default
                        return param_default
                return default

            task = Task(
                drm=drm if drm is not None else self.cosmos_app.default_drm,
                job_class=job_class if job_class is not None else
                if queue is not None else self.cosmos_app.default_queue,
                core_req=core_req if core_req is not None else
                params_or_signature_default_or("core_req", 1),
                mem_req=mem_req if mem_req is not None else
                params_or_signature_default_or("mem_req", None),
                if time_req is not None else self.cosmos_app.default_time_req,
                max_attempts=max_attempts if max_attempts is not None else
                gpu_req=gpu_req if gpu_req is not None else
                params_or_signature_default_or("gpu_req", 0),
                if environment_variables is not None else

            task.cmd_fxn = func

            if drm_options is None:
                task.drm_options = {}
                task.drm_options = drm_options
            # use default for any keys not set
            if self.cosmos_app.default_drm_options is not None:
                for key, val in list(
                    if key not in task.drm_options:
                        task.drm_options[key] = val

            DRM.validate_drm_options(task.drm, task.drm_options)

        # Add Stage Dependencies
        for p in parents:
            if p.stage not in stage.parents:


        return task
コード例 #3
ファイル: Workflow.py プロジェクト: p7k/COSMOS2
    def add_task(self,
        Adds a new Task to the Workflow.  If the Task already exists (and was successful), return the successful Task stored in the database

        :param callable func: A function which returns a string which will get converted to a shell script to be executed.  `func` will not get called until
          all of its dependencies have completed.
        :param dict params: Parameters to `func`.  Must be jsonable so that it can be stored in the database.  Any Dependency objects will get resolved into
            a string, and the Dependency.task will be added to this Task's parents.
        :param list[Tasks] parents: A list of dependent Tasks.
        :param str uid: A unique identifier for this Task, primarily used for skipping  previously successful Tasks.
            If a Task with this stage_name and uid already exists in the database (and was successful), the
            database version will be returned and a new one will not be created.
        :param str stage_name: The name of the Stage to add this Task to.  Defaults to `func.__name__`.
        :param str drm: The drm to use for this Task (example 'local', 'ge' or 'drmaa:lsf').  Defaults to the `default_drm` parameter of :meth:`Cosmos.start`
        :param job_class: The name of a job_class to submit to; defaults to the `default_job_class` parameter of :meth:`Cosmos.start`
        :param queue: The name of a queue to submit to; defaults to the `default_queue` parameter of :meth:`Cosmos.start`
        :param bool must_succeed: Default True.  If False, the Workflow will not fail if this Task does not succeed.  Dependent Jobs will not be executed.
        :param bool time_req: The time requirement; will set the Task.time_req attribute which is intended to be used by :func:`get_submit_args` to request resources.
        :param int cpu_req: Number of cpus required for this Task.  Can also be set in the `params` dict or the default value of the Task function signature, but this value takes precedence.
            Warning!  In future versions, this will be the only way to set it.
        :param int mem_req: Number of MB of RAM required for this Task.   Can also be set in the `params` dict or the default value of the Task function signature, but this value takes predence.
            Warning!  In future versions, this will be the only way to set it.
        :param int max_attempts: The maximum number of times to retry a failed job.  Defaults to the `default_max_attempts` parameter of :meth:`Cosmos.start`
        :rtype: cosmos.api.Task
        # Avoid cyclical import dependencies
        from cosmos.job.drm.DRM_Base import DRM
        from cosmos.models.Stage import Stage
        from cosmos import recursive_resolve_dependency

        # parents
        if parents is None:
            parents = []
        elif isinstance(parents, Task):
            parents = [parents]
            parents = list(parents)

        # params
        if params is None:
            params = dict()
        for k, v in params.iteritems():
            # decompose `Dependency` objects to values and parents
            new_val, parent_tasks = recursive_resolve_dependency(v)

            params[k] = new_val
            parents.extend(parent_tasks - set(parents))

        # uid
        if uid is None:
            raise AssertionError, 'uid parameter must be specified'
            # Fix me assert params are all JSONable
            # uid = str(params)
            assert isinstance(uid, basestring), 'uid must be a string'

        if stage_name is None:
            stage_name = str(func.__name__)

        # Get the right Stage
        stage = only_one((s for s in self.stages if s.name == stage_name),
        if stage is None:
            stage = Stage(workflow=self,

        # Check if task is already in stage
        task = stage.get_task(uid, None)

        if task is not None:
            # if task is already in stage, but unsuccessful, raise an error (duplicate params) since unsuccessful tasks
            # were already removed on workflow load
            if task.successful:
                # If the user manually edited the dag and this a resume, parents might need to be-readded

                for p in parents:
                    if p.stage not in stage.parents:

                return task
                # TODO check for duplicate params here?  would be a lot faster at Workflow.run
                raise ValueError(
                    'Duplicate uid, you have added a Task to Stage %s with the uid (unique identifier) `%s` twice.  '
                    'Task uids must be unique within the same Stage.' %
                    (stage_name, uid))
            # Create Task
            sig = funcsigs.signature(func)

            def params_or_signature_default_or(name, default):
                if name in params:
                    return params[name]
                if name in sig.parameters:
                    param_default = sig.parameters[name].default
                    if param_default is funcsigs._empty:
                        return default
                        return param_default
                return default

            input_map = dict()
            output_map = dict()

            for keyword, param in sig.parameters.iteritems():
                if keyword.startswith('in_'):
                    v = params.get(keyword, param.default)
                    assert v != funcsigs._empty, 'parameter %s for %s is required' % (
                        param, func)
                    input_map[keyword] = v
                elif keyword.startswith('out_'):
                    v = params.get(keyword, param.default)
                    assert v != funcsigs._empty, 'parameter %s for %s is required' % (
                        param, func)
                    output_map[keyword] = v

            task = Task(
                drm=drm if drm is not None else self.cosmos_app.default_drm,
                job_class=job_class if job_class is not None else
                if queue is not None else self.cosmos_app.default_queue,
                core_req=core_req if core_req is not None else
                params_or_signature_default_or('core_req', 1),
                mem_req=mem_req if mem_req is not None else
                params_or_signature_default_or('mem_req', None),
                if time_req is not None else self.cosmos_app.default_time_req,
                max_attempts=max_attempts if max_attempts is not None else

            task.cmd_fxn = func

            task.drm_options = drm_options if drm_options is not None else self.cosmos_app.default_drm_options
            DRM.validate_drm_options(task.drm, task.drm_options)

        # Add Stage Dependencies
        for p in parents:
            if p.stage not in stage.parents:


        return task
コード例 #4
ファイル: Workflow.py プロジェクト: LPM-HMS/COSMOS2
    def add_task(self, func, params=None, parents=None, stage_name=None, uid=None, drm=None,
                 queue=None, must_succeed=True, time_req=None, core_req=None, mem_req=None,
                 max_attempts=None, noop=False, job_class=None, drm_options=None):
        Adds a new Task to the Workflow.  If the Task already exists (and was successful), return the successful Task stored in the database

        :param callable func: A function which returns a string which will get converted to a shell script to be executed.  `func` will not get called until
          all of its dependencies have completed.
        :param dict params: Parameters to `func`.  Must be jsonable so that it can be stored in the database.  Any Dependency objects will get resolved into
            a string, and the Dependency.task will be added to this Task's parents.
        :param list[Tasks] parents: A list of dependent Tasks.
        :param str uid: A unique identifier for this Task, primarily used for skipping  previously successful Tasks.
            If a Task with this stage_name and uid already exists in the database (and was successful), the
            database version will be returned and a new one will not be created.
        :param str stage_name: The name of the Stage to add this Task to.  Defaults to `func.__name__`.
        :param str drm: The drm to use for this Task (example 'local', 'ge' or 'drmaa:lsf').  Defaults to the `default_drm` parameter of :meth:`Cosmos.start`
        :param job_class: The name of a job_class to submit to; defaults to the `default_job_class` parameter of :meth:`Cosmos.start`
        :param queue: The name of a queue to submit to; defaults to the `default_queue` parameter of :meth:`Cosmos.start`
        :param bool must_succeed: Default True.  If False, the Workflow will not fail if this Task does not succeed.  Dependent Jobs will not be executed.
        :param bool time_req: The time requirement; will set the Task.time_req attribute which is intended to be used by :func:`get_submit_args` to request resources.
        :param int cpu_req: Number of cpus required for this Task.  Can also be set in the `params` dict or the default value of the Task function signature, but this value takes precedence.
            Warning!  In future versions, this will be the only way to set it.
        :param int mem_req: Number of MB of RAM required for this Task.   Can also be set in the `params` dict or the default value of the Task function signature, but this value takes predence.
            Warning!  In future versions, this will be the only way to set it.
        :param int max_attempts: The maximum number of times to retry a failed job.  Defaults to the `default_max_attempts` parameter of :meth:`Cosmos.start`
        :rtype: cosmos.api.Task
        # Avoid cyclical import dependencies
        from cosmos.job.drm.DRM_Base import DRM
        from cosmos.models.Stage import Stage
        from cosmos import recursive_resolve_dependency

        # parents
        if parents is None:
            parents = []
        elif isinstance(parents, Task):
            parents = [parents]
            parents = list(parents)

        # params
        if params is None:
            params = dict()
        for k, v in params.iteritems():
            # decompose `Dependency` objects to values and parents
            new_val, parent_tasks = recursive_resolve_dependency(v)

            params[k] = new_val
            parents.extend(parent_tasks - set(parents))

        # uid
        if uid is None:
            raise AssertionError, 'uid parameter must be specified'
            # Fix me assert params are all JSONable
            # uid = str(params)
            assert isinstance(uid, basestring), 'uid must be a string'

        if stage_name is None:
            stage_name = str(func.__name__)

        # Get the right Stage
        stage = only_one((s for s in self.stages if s.name == stage_name), None)
        if stage is None:
            stage = Stage(workflow=self, name=stage_name, status=StageStatus.no_attempt)

        # Check if task is already in stage
        task = stage.get_task(uid, None)

        if task is not None:
            # if task is already in stage, but unsuccessful, raise an error (duplicate params) since unsuccessful tasks
            # were already removed on workflow load
            if task.successful:
                # If the user manually edited the dag and this a resume, parents might need to be-readded

                for p in parents:
                    if p.stage not in stage.parents:

                return task
                # TODO check for duplicate params here?  would be a lot faster at Workflow.run
                raise ValueError('Duplicate uid, you have added a Task to Stage %s with the uid (unique identifier) `%s` twice.  '
                                 'Task uids must be unique within the same Stage.' % (stage_name, uid))
            # Create Task
            sig = funcsigs.signature(func)

            def params_or_signature_default_or(name, default):
                if name in params:
                    return params[name]
                if name in sig.parameters:
                    param_default = sig.parameters[name].default
                    if param_default is funcsigs._empty:
                        return default
                        return param_default
                return default

            task = Task(stage=stage,
                        drm=drm if drm is not None else self.cosmos_app.default_drm,
                        job_class=job_class if job_class is not None else self.cosmos_app.default_job_class,
                        queue=queue if queue is not None else self.cosmos_app.default_queue,
                        core_req=core_req if core_req is not None else params_or_signature_default_or('core_req', 1),
                        mem_req=mem_req if mem_req is not None else params_or_signature_default_or('mem_req', None),
                        time_req=time_req if time_req is not None else self.cosmos_app.default_time_req,
                        max_attempts=max_attempts if max_attempts is not None else self.cosmos_app.default_max_attempts,

            task.cmd_fxn = func

            task.drm_options = drm_options if drm_options is not None else self.cosmos_app.default_drm_options
            DRM.validate_drm_options(task.drm, task.drm_options)

        # Add Stage Dependencies
        for p in parents:
            if p.stage not in stage.parents:


        return task