def __init__(self, get_submit_args, log_out_dir_func=default_task_log_output_dir, cmd_wrapper=None): self.drms = {DRM_sub_cls.name: DRM_sub_cls(self) for DRM_sub_cls in DRM.__subclasses__()} # self.local_drm = DRM_Local(self) self.tasks = [] self.running_tasks = [] self.dead_tasks = [] self.get_submit_args = get_submit_args self.cmd_wrapper = cmd_wrapper self.log_out_dir_func = log_out_dir_func
def __init__(self, get_submit_args, log_out_dir_func=default_task_log_output_dir, cmd_wrapper=None): self.drms = { DRM_sub_cls.name: DRM_sub_cls(self) for DRM_sub_cls in DRM.__subclasses__() } # self.local_drm = DRM_Local(self) self.tasks = [] self.running_tasks = [] self.dead_tasks = [] self.get_submit_args = get_submit_args self.cmd_wrapper = cmd_wrapper self.log_out_dir_func = log_out_dir_func
def add_task(self, func, params=None, parents=None, stage_name=None, uid=None, drm=None, queue=None, must_succeed=True, time_req=None, core_req=None, mem_req=None, max_attempts=None, noop=False, job_class=None, drm_options=None): """ Adds a new Task to the Workflow. If the Task already exists (and was successful), return the successful Task stored in the database :param callable func: A function which returns a string which will get converted to a shell script to be executed. `func` will not get called until all of its dependencies have completed. :param dict params: Parameters to `func`. Must be jsonable so that it can be stored in the database. Any Dependency objects will get resolved into a string, and the Dependency.task will be added to this Task's parents. :param list[Tasks] parents: A list of dependent Tasks. :param str uid: A unique identifier for this Task, primarily used for skipping previously successful Tasks. If a Task with this stage_name and uid already exists in the database (and was successful), the database version will be returned and a new one will not be created. :param str stage_name: The name of the Stage to add this Task to. Defaults to `func.__name__`. :param str drm: The drm to use for this Task (example 'local', 'ge' or 'drmaa:lsf'). Defaults to the `default_drm` parameter of :meth:`Cosmos.start` :param job_class: The name of a job_class to submit to; defaults to the `default_job_class` parameter of :meth:`Cosmos.start` :param queue: The name of a queue to submit to; defaults to the `default_queue` parameter of :meth:`Cosmos.start` :param bool must_succeed: Default True. If False, the Workflow will not fail if this Task does not succeed. Dependent Jobs will not be executed. :param bool time_req: The time requirement; will set the Task.time_req attribute which is intended to be used by :func:`get_submit_args` to request resources. :param int cpu_req: Number of cpus required for this Task. Can also be set in the `params` dict or the default value of the Task function signature, but this value takes precedence. Warning! In future versions, this will be the only way to set it. :param int mem_req: Number of MB of RAM required for this Task. Can also be set in the `params` dict or the default value of the Task function signature, but this value takes predence. Warning! In future versions, this will be the only way to set it. :param int max_attempts: The maximum number of times to retry a failed job. Defaults to the `default_max_attempts` parameter of :meth:`Cosmos.start` :rtype: cosmos.api.Task """ # Avoid cyclical import dependencies from cosmos.job.drm.DRM_Base import DRM from cosmos.models.Stage import Stage from cosmos import recursive_resolve_dependency # parents if parents is None: parents = [] elif isinstance(parents, Task): parents = [parents] else: parents = list(parents) # params if params is None: params = dict() for k, v in params.iteritems(): # decompose `Dependency` objects to values and parents new_val, parent_tasks = recursive_resolve_dependency(v) params[k] = new_val parents.extend(parent_tasks - set(parents)) # uid if uid is None: raise AssertionError, 'uid parameter must be specified' # Fix me assert params are all JSONable # uid = str(params) else: assert isinstance(uid, basestring), 'uid must be a string' if stage_name is None: stage_name = str(func.__name__) # Get the right Stage stage = only_one((s for s in self.stages if s.name == stage_name), None) if stage is None: stage = Stage(workflow=self, name=stage_name, status=StageStatus.no_attempt) self.session.add(stage) # Check if task is already in stage task = stage.get_task(uid, None) if task is not None: # if task is already in stage, but unsuccessful, raise an error (duplicate params) since unsuccessful tasks # were already removed on workflow load if task.successful: # If the user manually edited the dag and this a resume, parents might need to be-readded task.parents.extend(set(parents).difference(set(task.parents))) for p in parents: if p.stage not in stage.parents: stage.parents.append(p.stage) return task else: # TODO check for duplicate params here? would be a lot faster at Workflow.run raise ValueError( 'Duplicate uid, you have added a Task to Stage %s with the uid (unique identifier) `%s` twice. ' 'Task uids must be unique within the same Stage.' % (stage_name, uid)) else: # Create Task sig = funcsigs.signature(func) def params_or_signature_default_or(name, default): if name in params: return params[name] if name in sig.parameters: param_default = sig.parameters[name].default if param_default is funcsigs._empty: return default else: return param_default return default input_map = dict() output_map = dict() for keyword, param in sig.parameters.iteritems(): if keyword.startswith('in_'): v = params.get(keyword, param.default) assert v != funcsigs._empty, 'parameter %s for %s is required' % ( param, func) input_map[keyword] = v elif keyword.startswith('out_'): v = params.get(keyword, param.default) assert v != funcsigs._empty, 'parameter %s for %s is required' % ( param, func) output_map[keyword] = v task = Task( stage=stage, params=params, parents=parents, input_map=input_map, output_map=output_map, uid=uid, drm=drm if drm is not None else self.cosmos_app.default_drm, job_class=job_class if job_class is not None else self.cosmos_app.default_job_class, queue=queue if queue is not None else self.cosmos_app.default_queue, must_succeed=must_succeed, core_req=core_req if core_req is not None else params_or_signature_default_or('core_req', 1), mem_req=mem_req if mem_req is not None else params_or_signature_default_or('mem_req', None), time_req=time_req if time_req is not None else self.cosmos_app.default_time_req, successful=False, max_attempts=max_attempts if max_attempts is not None else self.cosmos_app.default_max_attempts, attempt=1, NOOP=noop) task.cmd_fxn = func task.drm_options = drm_options if drm_options is not None else self.cosmos_app.default_drm_options DRM.validate_drm_options(task.drm, task.drm_options) # Add Stage Dependencies for p in parents: if p.stage not in stage.parents: stage.parents.append(p.stage) self.dont_garbage_collect.append(task) return task
def __init__(self, database_url='sqlite:///:memory:', get_submit_args=default_get_submit_args, default_drm='local', default_drm_options=None, default_queue=None, default_time_req=None, default_max_attempts=1, flask_app=None, default_job_class=None): """ :param str database_url: A `sqlalchemy database url <http://docs.sqlalchemy.org/en/latest/core/engines.html>`_. ex: sqlite:///home/user/sqlite.db or mysql://user:pass@localhost/database_name or postgresql+psycopg2://user:pass@localhost/database_name :param callable get_submit_args: a function that returns arguments to be passed to the job submitter, like resource requirements or the queue to submit to. See :func:`cosmos.api.default_get_submit_args` for details :param flask.Flask flask_app: A Flask application instance for the web interface. The default behavior is to create one. :param str default_drm: The Default DRM to use (ex 'local', 'lsf', or 'ge') """ default_drm_options = {} if default_drm_options is None else default_drm_options # Avoid cyclical import dependencies from cosmos.job.drm.DRM_Base import DRM assert default_drm.split(':')[0] in DRM.get_drm_names(), 'unsupported drm: %s' % \ default_drm.split(':')[0] assert '://' in database_url, 'Invalid database_url: %s' % database_url # self.futures_executor = futures.ThreadPoolExecutor(10) if flask_app: self.flask_app = flask_app else: self.flask_app = Flask(__name__) self.flask_app.secret_key = os.urandom(24) self.get_submit_args = get_submit_args # self.flask_app.config['SQLALCHEMY_DATABASE_URI'] = database_url # self.flask_app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False self.flask_app.jinja_env.globals['time_now'] = datetime.now() # self.flask_app.config['SQLALCHEMY_ECHO'] = True # from flask_sqlalchemy import SQLAlchemy # # # self.sqla = SQLAlchemy(self.flask_app) # self.session = self.sqla.session from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.ext.declarative import declarative_base engine = create_engine(database_url, convert_unicode=True) self.session = scoped_session( sessionmaker(autocommit=False, autoflush=False, bind=engine)) Base = declarative_base() Base.query = self.session.query_property() @self.flask_app.teardown_appcontext def shutdown_session(exception=None): self.session.remove() self.default_drm = default_drm self.default_drm_options = default_drm_options self.default_job_class = default_job_class self.default_queue = default_queue self.default_max_attempts = default_max_attempts self.default_time_req = default_time_req
def add_task( self, func, params=None, parents=None, stage_name=None, uid=None, drm=None, queue=None, must_succeed=True, time_req=None, core_req=None, mem_req=None, gpu_req=None, max_attempts=None, noop=False, job_class=None, drm_options=None, environment_variables=None, if_duplicate="raise", ): """ Adds a new Task to the Workflow. If the Task already exists (and was successful), return the successful Task stored in the database :param callable func: A function which returns a string which will get converted to a shell script to be executed. `func` will not get called until all of its dependencies have completed. :param dict params: Parameters to `func`. Must be jsonable so that it can be stored in the database. Any Dependency objects will get resolved into a string, and the Dependency.task will be added to this Task's parents. :param list[Tasks] parents: A list of dependent Tasks. :param str uid: A unique identifier for this Task, primarily used for skipping previously successful Tasks. If a Task with this stage_name and uid already exists in the database (and was successful), the database version will be returned and a new one will not be created. :param str stage_name: The name of the Stage to add this Task to. Defaults to `func.__name__`. :param str drm: The drm to use for this Task (example 'local', 'ge' or 'drmaa:lsf'). Defaults to the `default_drm` parameter of :meth:`Cosmos.start` :param job_class: The name of a job_class to submit to; defaults to the `default_job_class` parameter of :meth:`Cosmos.start` :param queue: The name of a queue to submit to; defaults to the `default_queue` parameter of :meth:`Cosmos.start` :param bool must_succeed: Default True. If False, the Workflow will not fail if this Task does not succeed. Dependent Jobs will not be executed. :param bool time_req: The time requirement; will set the Task.time_req attribute which is intended to be used by :func:`get_submit_args` to request resources. :param int core_req: Number of cpus required for this Task. Can also be set in the `params` dict or the default value of the Task function signature, but this value takes precedence. Warning! In future versions, this will be the only way to set it. :param int mem_req: Number of MB of RAM required for this Task. Can also be set in the `params` dict or the default value of the Task function signature, but this value takes predence. Warning! In future versions, this will be the only way to set it. :param int gpu_req: Number of gpus required for this Task. :param int max_attempts: The maximum number of times to retry a failed job. Defaults to the `default_max_attempts` parameter of :meth:`Cosmos.start` :param bool noop: Task is a No-op and will always be marked as successful. :param dict drm_options: Options for Distributed Resource Management (cluster). :param dict environment_variables: Environment variables to pass to the DRM (if supported). :param str if_duplicate: If "raise", raises an error if a Task with the same UID has already been added to this Workflow. If "return", return that Task, allowing for an easy way to avoid duplicate work. :rtype: cosmos.api.Task """ # Avoid cyclical import dependencies from cosmos.job.drm.DRM_Base import DRM from cosmos.models.Stage import Stage from cosmos import recursive_resolve_dependency # parents if parents is None: parents = [] elif isinstance(parents, Task): parents = [parents] else: parents = list(parents) # params if params is None: params = dict() for k, v in list(params.items()): # decompose `Dependency` objects to values and parents new_val, parent_tasks = recursive_resolve_dependency(v) params[k] = new_val parents.extend(parent_tasks - set(parents)) # uid if uid is None: raise AssertionError("uid parameter must be specified") # Fix me assert params are all JSONable # uid = str(params) else: assert isinstance(uid, str), "uid must be a string" if stage_name is None: stage_name = str(func.__name__) # Get the right Stage stage = only_one((s for s in self.stages if s.name == stage_name), None) if stage is None: stage = Stage(workflow=self, name=stage_name, status=StageStatus.no_attempt) self.session.add(stage) # Check if task is already in stage task = stage.get_task(uid, None) if task is not None: # if task is already in stage, but unsuccessful, raise an error (duplicate params) since unsuccessful tasks # were already removed on workflow load if task.successful: # If the user manually edited the dag and this a resume, parents might need to be-readded task.parents.extend(set(parents).difference(set(task.parents))) for p in parents: if p.stage not in stage.parents: stage.parents.append(p.stage) return task else: if if_duplicate == "raise": raise DuplicateUid( "Duplicate uid, you have added a Task to Stage %s with the uid (unique identifier) `%s` twice. " "Task uids must be unique within the same Stage." % (stage_name, uid)) elif if_duplicate == "return": if task.params != params: raise InvalidParams( f"Tried to add a task with the same uid, but different parameters." ) return task else: raise ValueError(f"{if_duplicate} is not valid") else: # Create Task sig = funcsigs.signature(func) def params_or_signature_default_or(name, default): if name in params: return params[name] if name in sig.parameters: param_default = sig.parameters[name].default if param_default is funcsigs._empty: return default else: return param_default return default task = Task( stage=stage, params=params, parents=parents, uid=uid, drm=drm if drm is not None else self.cosmos_app.default_drm, job_class=job_class if job_class is not None else self.cosmos_app.default_job_class, queue=queue if queue is not None else self.cosmos_app.default_queue, must_succeed=must_succeed, core_req=core_req if core_req is not None else params_or_signature_default_or("core_req", 1), mem_req=mem_req if mem_req is not None else params_or_signature_default_or("mem_req", None), time_req=time_req if time_req is not None else self.cosmos_app.default_time_req, successful=False, max_attempts=max_attempts if max_attempts is not None else self.cosmos_app.default_max_attempts, attempt=1, NOOP=noop, gpu_req=gpu_req if gpu_req is not None else params_or_signature_default_or("gpu_req", 0), environment_variables=environment_variables if environment_variables is not None else self.cosmos_app.default_environment_variables, ) task.cmd_fxn = func if drm_options is None: task.drm_options = {} else: task.drm_options = drm_options # use default for any keys not set if self.cosmos_app.default_drm_options is not None: for key, val in list( self.cosmos_app.default_drm_options.items()): if key not in task.drm_options: task.drm_options[key] = val DRM.validate_drm_options(task.drm, task.drm_options) # Add Stage Dependencies for p in parents: if p.stage not in stage.parents: stage.parents.append(p.stage) self._dont_garbage_collect.append(task) return task
def __init__(self, database_url='sqlite:///:memory:', get_submit_args=default_get_submit_args, default_drm='local', default_drm_options=None, default_queue=None, default_time_req=None, default_max_attempts=1, flask_app=None, default_job_class=None): """ :param str database_url: A `sqlalchemy database url <http://docs.sqlalchemy.org/en/latest/core/engines.html>`_. ex: sqlite:///home/user/sqlite.db or mysql://user:pass@localhost/database_name or postgresql+psycopg2://user:pass@localhost/database_name :param callable get_submit_args: a function that returns arguments to be passed to the job submitter, like resource requirements or the queue to submit to. See :func:`cosmos.api.default_get_submit_args` for details :param flask.Flask flask_app: A Flask application instance for the web interface. The default behavior is to create one. :param str default_drm: The Default DRM to use (ex 'local', 'lsf', or 'ge') """ default_drm_options = {} if default_drm_options is None else default_drm_options # Avoid cyclical import dependencies from cosmos.job.drm.DRM_Base import DRM assert default_drm.split(':')[0] in DRM.get_drm_names(), 'unsupported drm: %s' % \ default_drm.split(':')[0] assert '://' in database_url, 'Invalid database_url: %s' % database_url # self.futures_executor = futures.ThreadPoolExecutor(10) if flask_app: self.flask_app = flask_app else: self.flask_app = Flask(__name__) self.flask_app.secret_key = os.urandom(24) self.get_submit_args = get_submit_args # self.flask_app.config['SQLALCHEMY_DATABASE_URI'] = database_url # self.flask_app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False self.flask_app.jinja_env.globals['time_now'] = datetime.now() # self.flask_app.config['SQLALCHEMY_ECHO'] = True # from flask_sqlalchemy import SQLAlchemy # # # self.sqla = SQLAlchemy(self.flask_app) # self.session = self.sqla.session from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker, scoped_session from sqlalchemy.ext.declarative import declarative_base engine = create_engine(database_url, convert_unicode=True) self.session = scoped_session(sessionmaker(autocommit=False, autoflush=False, bind=engine)) Base = declarative_base() Base.query = self.session.query_property() @self.flask_app.teardown_appcontext def shutdown_session(exception=None): self.session.remove() self.default_drm = default_drm self.default_drm_options = default_drm_options self.default_job_class = default_job_class self.default_queue = default_queue self.default_max_attempts = default_max_attempts self.default_time_req = default_time_req
def add_task(self, func, params=None, parents=None, stage_name=None, uid=None, drm=None, queue=None, must_succeed=True, time_req=None, core_req=None, mem_req=None, max_attempts=None, noop=False, job_class=None, drm_options=None): """ Adds a new Task to the Workflow. If the Task already exists (and was successful), return the successful Task stored in the database :param callable func: A function which returns a string which will get converted to a shell script to be executed. `func` will not get called until all of its dependencies have completed. :param dict params: Parameters to `func`. Must be jsonable so that it can be stored in the database. Any Dependency objects will get resolved into a string, and the Dependency.task will be added to this Task's parents. :param list[Tasks] parents: A list of dependent Tasks. :param str uid: A unique identifier for this Task, primarily used for skipping previously successful Tasks. If a Task with this stage_name and uid already exists in the database (and was successful), the database version will be returned and a new one will not be created. :param str stage_name: The name of the Stage to add this Task to. Defaults to `func.__name__`. :param str drm: The drm to use for this Task (example 'local', 'ge' or 'drmaa:lsf'). Defaults to the `default_drm` parameter of :meth:`Cosmos.start` :param job_class: The name of a job_class to submit to; defaults to the `default_job_class` parameter of :meth:`Cosmos.start` :param queue: The name of a queue to submit to; defaults to the `default_queue` parameter of :meth:`Cosmos.start` :param bool must_succeed: Default True. If False, the Workflow will not fail if this Task does not succeed. Dependent Jobs will not be executed. :param bool time_req: The time requirement; will set the Task.time_req attribute which is intended to be used by :func:`get_submit_args` to request resources. :param int cpu_req: Number of cpus required for this Task. Can also be set in the `params` dict or the default value of the Task function signature, but this value takes precedence. Warning! In future versions, this will be the only way to set it. :param int mem_req: Number of MB of RAM required for this Task. Can also be set in the `params` dict or the default value of the Task function signature, but this value takes predence. Warning! In future versions, this will be the only way to set it. :param int max_attempts: The maximum number of times to retry a failed job. Defaults to the `default_max_attempts` parameter of :meth:`Cosmos.start` :rtype: cosmos.api.Task """ # Avoid cyclical import dependencies from cosmos.job.drm.DRM_Base import DRM from cosmos.models.Stage import Stage from cosmos import recursive_resolve_dependency # parents if parents is None: parents = [] elif isinstance(parents, Task): parents = [parents] else: parents = list(parents) # params if params is None: params = dict() for k, v in params.iteritems(): # decompose `Dependency` objects to values and parents new_val, parent_tasks = recursive_resolve_dependency(v) params[k] = new_val parents.extend(parent_tasks - set(parents)) # uid if uid is None: raise AssertionError, 'uid parameter must be specified' # Fix me assert params are all JSONable # uid = str(params) else: assert isinstance(uid, basestring), 'uid must be a string' if stage_name is None: stage_name = str(func.__name__) # Get the right Stage stage = only_one((s for s in self.stages if s.name == stage_name), None) if stage is None: stage = Stage(workflow=self, name=stage_name, status=StageStatus.no_attempt) self.session.add(stage) # Check if task is already in stage task = stage.get_task(uid, None) if task is not None: # if task is already in stage, but unsuccessful, raise an error (duplicate params) since unsuccessful tasks # were already removed on workflow load if task.successful: # If the user manually edited the dag and this a resume, parents might need to be-readded task.parents.extend(set(parents).difference(set(task.parents))) for p in parents: if p.stage not in stage.parents: stage.parents.append(p.stage) return task else: # TODO check for duplicate params here? would be a lot faster at Workflow.run raise ValueError('Duplicate uid, you have added a Task to Stage %s with the uid (unique identifier) `%s` twice. ' 'Task uids must be unique within the same Stage.' % (stage_name, uid)) else: # Create Task sig = funcsigs.signature(func) def params_or_signature_default_or(name, default): if name in params: return params[name] if name in sig.parameters: param_default = sig.parameters[name].default if param_default is funcsigs._empty: return default else: return param_default return default task = Task(stage=stage, params=params, parents=parents, uid=uid, drm=drm if drm is not None else self.cosmos_app.default_drm, job_class=job_class if job_class is not None else self.cosmos_app.default_job_class, queue=queue if queue is not None else self.cosmos_app.default_queue, must_succeed=must_succeed, core_req=core_req if core_req is not None else params_or_signature_default_or('core_req', 1), mem_req=mem_req if mem_req is not None else params_or_signature_default_or('mem_req', None), time_req=time_req if time_req is not None else self.cosmos_app.default_time_req, successful=False, max_attempts=max_attempts if max_attempts is not None else self.cosmos_app.default_max_attempts, attempt=1, NOOP=noop ) task.cmd_fxn = func task.drm_options = drm_options if drm_options is not None else self.cosmos_app.default_drm_options DRM.validate_drm_options(task.drm, task.drm_options) # Add Stage Dependencies for p in parents: if p.stage not in stage.parents: stage.parents.append(p.stage) self.dont_garbage_collect.append(task) return task