def __init__(self, dag_id=None, cwl_workflow=None, default_args={}, schedule_interval=None, *args, **kwargs): self.top_task = None self.bottom_task = None self.cwlwf = self.quick_load_cwl(cwl_workflow) kwargs.update({ "on_failure_callback": kwargs.get("on_failure_callback", dag_on_failure), "on_success_callback": kwargs.get("on_success_callback", dag_on_success) }) # parameters that cannot be overwritten in default_args default_args.update({ 'singularity': conf_get_default('cwl', 'singularity', default_args.get('singularity', False)), 'use_container': conf_get_default('cwl', 'use_container', default_args.get('use_container', True)) }) init_default_args = { 'start_date': days_ago(14), 'email_on_failure': False, 'email_on_retry': False, 'end_date': None, 'tmp_folder': conf_get_default('cwl', 'tmp_folder', '/tmp'), 'basedir': conf_get_default('cwl', 'tmp_folder', '/tmp'), 'no_match_user': conf_get_default('cwl', 'no_match_user', False), 'task_retries': conf_get_default('cwl', 'retry', 1), 'quiet': False, 'strict': False, 'on_error': 'continue', 'skip_schemas': True, 'cwl_workflow': cwl_workflow } init_default_args.update(default_args) merged_default_args = get_default_args() merged_default_args.update(init_default_args) super(self.__class__, self).__init__( dag_id=dag_id if dag_id else urllib.parse.urldefrag(cwl_workflow)[0].split("/")[-1].replace( ".cwl", "").replace(".", "_dot_"), default_args=merged_default_args, schedule_interval=schedule_interval, *args, **kwargs)
def __init__( self, dag_id=None, cwl_workflow=None, default_args=None, schedule_interval=None, *args, **kwargs): self.top_task = None self.bottom_task = None self.cwlwf = None self.requirements = None tmp_folder = conf_get_default('cwl', 'tmp_folder', '/tmp') _default_args = { 'start_date': utcnow(), 'email_on_failure': False, 'email_on_retry': False, 'end_date': None, 'tmp_folder': tmp_folder, 'basedir': tmp_folder, 'print_deps': False, 'print_pre': False, 'print_rdf': False, 'print_dot': False, 'relative_deps': False, 'use_container': True, 'rm_container': True, 'enable_pull': True, 'preserve_environment': ["PATH"], 'preserve_entire_environment': False, 'print_input_deps': False, 'cachedir': None, 'rm_tmpdir': True, 'move_outputs': 'move', 'eval_timeout': 20, 'quiet': False, 'version': False, 'enable_dev': False, 'enable_ext': False, 'strict': False, 'rdf_serializer': None, 'tool_help': False, 'pack': False, 'on_error': 'continue', 'relax_path_checks': False, 'validate': False, 'compute_checksum': True, 'skip_schemas': True, 'no_match_user': False, } _default_args.update(default_args if default_args else {}) _d = get_default_args() _d.update(_default_args) self.cwl_workflow = cwl_workflow if cwl_workflow else _default_args["cwl_workflow"] _dag_id = dag_id if dag_id else urllib.parse.urldefrag(self.cwl_workflow)[0].split("/")[-1] \ .replace(".cwl", "").replace(".", "_dot_") super(self.__class__, self).__init__(dag_id=_dag_id, default_args=_d, schedule_interval=schedule_interval, *args, **kwargs)
def get_default_cwl_args(preset_cwl_args=None): """ Returns default arguments required by cwltool's functions with a few parameters added and overwritten (required by CWL-Airflow). Defaults can be preset through "preset_cwl_args" if provided. All new fields from "preset_cwl_args" will be added to the returned results. """ preset_cwl_args = {} if preset_cwl_args is None else deepcopy( preset_cwl_args) # default arguments required by cwltool required_cwl_args = get_default_args() # update default arguments required by cwltool with those that were preset by user required_cwl_args.update(preset_cwl_args) # update default arguments required by cwltool with those that might # be updated based on the higher priority of airflow configuration # file. If airflow configuration file doesn't include correspondent # parameters, use those that were preset by user, or defaults required_cwl_args.update({ "tmp_folder": get_dir( conf_get("cwl", "tmp_folder", preset_cwl_args.get("tmp_folder", CWL_TMP_FOLDER))), "outputs_folder": get_dir( # for CWL-Airflow to store outputs if "outputs_folder" is not overwritten in job conf_get("cwl", "outputs_folder", preset_cwl_args.get("outputs_folder", CWL_OUTPUTS_FOLDER))), "inputs_folder": get_dir( # for CWL-Airflow to resolve relative locations for input files if job was loaded from parsed object conf_get("cwl", "inputs_folder", preset_cwl_args.get("inputs_folder", CWL_INPUTS_FOLDER))), "pickle_folder": get_dir( # for CWL-Airflow to store pickled workflows conf_get("cwl", "pickle_folder", preset_cwl_args.get("pickle_folder", CWL_PICKLE_FOLDER))), "use_container": conf_get( "cwl", "use_container", preset_cwl_args.get( "use_container", CWL_USE_CONTAINER) # execute jobs in docker containers ), "no_match_user": conf_get( "cwl", "no_match_user", preset_cwl_args.get( "no_match_user", CWL_NO_MATCH_USER ) # disables passing the current uid to "docker run --user" ), "skip_schemas": conf_get( "cwl", "skip_schemas", preset_cwl_args.get( "skip_schemas", CWL_SKIP_SCHEMAS ) # it looks like this doesn't influence anything in the latest cwltool ), "strict": conf_get("cwl", "strict", preset_cwl_args.get("strict", CWL_STRICT)), "quiet": conf_get("cwl", "quiet", preset_cwl_args.get("quiet", CWL_QUIET)), "rm_tmpdir": preset_cwl_args.get( "rm_tmpdir", CWL_RM_TMPDIR ), # even if we can set it in "preset_cwl_args" it's better not to change "move_outputs": preset_cwl_args.get( "move_outputs", CWL_MOVE_OUTPUTS ), # even if we can set it in "preset_cwl_args" it's better not to change "enable_dev": preset_cwl_args.get( "enable_dev", CWL_ENABLE_DEV ) # fails to run without it when creating workflow from tool. TODO: Ask Peter? }) return required_cwl_args
def load_cwl(cwl_file): load.loaders = {} loading_context = cwltool.context.LoadingContext(get_default_args()) loading_context.construct_tool_object = default_make_tool loading_context.resolver = tool_resolver return load.load_tool(cwl_file, loading_context)