def test_parse_arguments_for_init_with_relative_path_for_both_params_if_environment_is_not_set( monkeypatch): temp_home = tempfile.mkdtemp() monkeypatch.delenv("AIRFLOW_HOME", raising=False) monkeypatch.delenv("AIRFLOW_CONFIG", raising=False) monkeypatch.setattr(path, "expanduser", lambda x: x.replace("~", temp_home)) input_airflow_home = "./not_default_anymore/airflow" input_airflow_cfg = "./not_default_anymore/airflow/airflow.cfg" control_airflow_home = get_absolute_path(input_airflow_home, temp_home) control_airflow_cfg = get_absolute_path(input_airflow_cfg, temp_home) input_args = [ "init", "--home", input_airflow_home, "--config", input_airflow_cfg ] result_args = parse_arguments(input_args, temp_home) rmtree(temp_home) assert result_args.home == control_airflow_home, \ "Failed to parse --home" assert result_args.config == control_airflow_cfg, \ "Failed to parse --config"
def load_test_suite(args): """ Loads tests from the provided --suite file. Selects tests based on the indices from --range. Updates tools locations to be absolute. Loads jobs and updates all inputs files locations to be absolute (unless --relative parameter was set). Adds "outputs_folder" to the job, as well as the "index" to indicate which test case was used. Adds run_id's as keys for easy access and proper test identification when receiving results. In case we failed to load test case, sets "finished" to True and writes reason to "error". """ suite_data = load_yaml(args.suite) suite_dir = os.path.dirname(args.suite) suite_data_filtered = OrderedDict() # use OrderedDict just to keep it similar to suite_data for i in args.range: test_data = suite_data[i] run_id = str(uuid.uuid4()) tool_location = get_absolute_path(test_data["tool"], suite_dir) logging.info(f"Read test case {i+1} to run {tool_location}") job_location = None job_data = {} if "job" in test_data: job_location = get_absolute_path(test_data["job"], suite_dir) try: if args.relative: # skips relative path resolutions as well as adding values from the workflow default inputs job_data = load_yaml(job_location) else: job_data = load_job( workflow=tool_location, job=job_location ) except Exception as ex: logging.error(f"Failed to load test case {i+1} to run {tool_location} with {job_location}") test_data.update({ "error": "Failed to load test case", "finished": True }) job_data["outputs_folder"] = get_dir(os.path.join(args.tmp, run_id)) test_data.update({ "job": job_data, # already parsed, includes "outputs_folder" "tool": tool_location, "dag_id": get_rootname(test_data["tool"]), "index": i+1, # to know test case number, 1-based to correspond to --range "finished": test_data.get("finished", False) # to indicate whether the test was finished or not }) logging.info(f"Successfully loaded test case {i+1} to run {tool_location} with {job_location} as {run_id}") suite_data_filtered[run_id] = test_data # use "run_id" as a key for fast access when checking results return suite_data_filtered
def execute(self, context): """ Loads job Object from the context. Sets "tmp_folder" and "output_folder" if they have not been set before in the job. In case "tmp_folder" and/or "output_folder" were read from the job and are relative, resolves paths relative to the "tmp_folder" and/or "outputs_folder" from "cwl_args". Dumps step outputs as a json file into "tmp_folder". Writes to X-Com report file location. """ setup_cwl_logger(context["ti"]) post_status(context) # for easy access dag_id = context["dag"].dag_id workflow = context["dag"].workflow run_id = context["run_id"].replace(":", "_").replace( "+", "_") # to make it dumpable by json cwl_args = context["dag"].default_args["cwl"] # Loads job from dag_run configuration. Sets defaults from "workflow". Fails on missing input files job_data = load_job(workflow=workflow, job=context["dag_run"].conf["job"], cwl_args=cwl_args) job_data["tmp_folder"] = get_dir( get_absolute_path( job_data.get( "tmp_folder", mkdtemp(dir=cwl_args["tmp_folder"], prefix=dag_id + "_" + run_id + "_")), cwl_args["tmp_folder"])) job_data["outputs_folder"] = get_dir( get_absolute_path( job_data.get( "outputs_folder", os.path.join(cwl_args["outputs_folder"], dag_id, run_id)), cwl_args["outputs_folder"])) _, _, _, step_report = get_temp_folders(task_id=self.task_id, job_data=job_data) dump_json(job_data, step_report) return step_report
def load_test_suite(args): """ Loads tests from the provided --suite file. Selects tests based on the indices from --range. Updates tools locations to be absolute, loads jobs and updates all inputs files locations to be absolute too. Adds "outputs_folder" to the job, as well as the "index" to indicate which test case was used. Adds run_id's as keys for easy access and proper test identification when receiving results. """ suite_data = load_yaml(args.suite) suite_dir = os.path.dirname(args.suite) suite_data_filtered = OrderedDict() # use OrderedDict just to keep it similar to suite_data for i in args.range: test_data = suite_data[i] run_id = str(uuid.uuid4()) tool_location = get_absolute_path(test_data["tool"], suite_dir) job_location = get_absolute_path(test_data["job"], suite_dir) if "job" in test_data: job_data = load_job( workflow=tool_location, job=job_location ) else: job_data = {} job_data["outputs_folder"] = get_dir(os.path.join(args.tmp, run_id)) test_data.update({ "job": job_data, # already parsed, includes "outputs_folder" "tool": tool_location, "dag_id": get_rootname(test_data["tool"]), "index": i+1, # to know test case number, 1-based to correspond to --range "finished": False # to indicate whether the test was finished or not }) logging.info(f"Load test case {i+1} to run {tool_location} with {job_location} as {run_id}") suite_data_filtered[run_id] = test_data # use "run_id" as a key for fast access when checking results return suite_data_filtered
def overwrite_deprecated_dag(dag_location, deprecated_dags_folder=None): """ Loads DAG content from "dag_location" file. Searches for "dag.create()" command. If not found, we don't need to upgrade this DAG (it's either not from CWL-Airflow, or already in a new format). If "deprecated_dags_folder" is not None, copies original DAG file there before DAG upgrading. After copying deprecated DAG to the "deprecated_dags_folder" updates ".airflowignore" with DAG file basename to exclude it from Airflow parsing. Upgraded DAG will always include base64 encoded gzip compressed workflow content. In case "workflow_location" is relative path, it will be resolved based on the dirname of "dag_location" (useful for tests only, because all our old DAGs always have absolute path to the CWL file). Function doesn't backup or update the original CWL file. TODO: in case more coplicated DAG files that include "default_args", etc, this function should be updated to the more complex one. """ with open(dag_location, "r+") as io_stream: # open for both reading and writing dag_content = io_stream.read() if not re.search("dag\\.create\\(\\)", dag_content): # do nothing if it wasn't old-style DAG return workflow_location = get_absolute_path( # resolve relative to dirname of "dag_location" (good for tests) re.search("(cwl_workflow\\s*=\\s*[\"|'])(.+?)([\"|'])", dag_content).group(2), os.path.dirname(dag_location)) dag_id = re.search("(dag_id\\s*=\\s*[\"|'])(.+?)([\"|'])", dag_content).group(2) compressed_workflow_content = get_compressed( fast_cwl_load( workflow_location ) # no "run" embedding or convertion to Workflow. If DAG worked, cwl should be ok too ) if deprecated_dags_folder is not None: # copy old DAG to the folder with deprecated DAGs, add ".airflowignore" get_dir( deprecated_dags_folder ) # try to create "deprecated_dags_folder" if it doesn't exist shutil.copy(dag_location, deprecated_dags_folder) # copy DAG file ignore = os.path.join(deprecated_dags_folder, ".airflowignore") with open( ignore, "a" ) as output_stream: # add deprecated DAG to ".airflowignore" output_stream.write(os.path.basename(dag_location) + "\n") io_stream.seek(0) # rewind "dag_location" file to the beginning io_stream.write( DAG_TEMPLATE.format(compressed_workflow_content, dag_id)) io_stream.truncate( ) # remove old data at the end of a file if anything became shorter than original
def get_normalized_args(args, skip_list=None, cwd=None): """ Converts all relative path arguments to absolute ones relatively to the cwd or current working directory. Skipped arguments and None will be returned unchanged. """ cwd = getcwd() if cwd is None else cwd skip_list = [] if skip_list is None else skip_list normalized_args = {} for key, value in args.__dict__.items(): if key not in skip_list and value is not None: if isinstance(value, list): for v in value: normalized_args.setdefault(key, []).append( get_absolute_path(v, cwd)) else: normalized_args[key] = get_absolute_path(value, cwd) else: normalized_args[key] = value return argparse.Namespace(**normalized_args)