Ejemplo n.º 1
0
def test_parse_arguments_for_init_with_relative_path_for_both_params_if_environment_is_not_set(
        monkeypatch):
    temp_home = tempfile.mkdtemp()
    monkeypatch.delenv("AIRFLOW_HOME", raising=False)
    monkeypatch.delenv("AIRFLOW_CONFIG", raising=False)
    monkeypatch.setattr(path, "expanduser",
                        lambda x: x.replace("~", temp_home))

    input_airflow_home = "./not_default_anymore/airflow"
    input_airflow_cfg = "./not_default_anymore/airflow/airflow.cfg"

    control_airflow_home = get_absolute_path(input_airflow_home, temp_home)
    control_airflow_cfg = get_absolute_path(input_airflow_cfg, temp_home)

    input_args = [
        "init", "--home", input_airflow_home, "--config", input_airflow_cfg
    ]

    result_args = parse_arguments(input_args, temp_home)
    rmtree(temp_home)

    assert result_args.home == control_airflow_home, \
        "Failed to parse --home"
    assert result_args.config == control_airflow_cfg, \
        "Failed to parse --config"
Ejemplo n.º 2
0
def load_test_suite(args):
    """
    Loads tests from the provided --suite file.
    Selects tests based on the indices from --range.
    
    Updates tools locations to be absolute. Loads
    jobs and updates all inputs files locations to
    be absolute (unless --relative parameter was set).
    Adds "outputs_folder" to the job, as well as the
    "index" to indicate which test case was used.

    Adds run_id's as keys for easy access and proper
    test identification when receiving results.

    In case we failed to load test case, sets "finished"
    to True and writes reason to "error".
    """

    suite_data = load_yaml(args.suite)
    suite_dir = os.path.dirname(args.suite)
    suite_data_filtered = OrderedDict()                                       # use OrderedDict just to keep it similar to suite_data
    for i in args.range:
        test_data = suite_data[i]
        run_id = str(uuid.uuid4())
        tool_location = get_absolute_path(test_data["tool"], suite_dir)
        logging.info(f"Read test case {i+1} to run {tool_location}")

        job_location = None
        job_data = {}

        if "job" in test_data:
            job_location = get_absolute_path(test_data["job"], suite_dir)
            try:
                if args.relative:                       # skips relative path resolutions as well as adding values from the workflow default inputs
                    job_data = load_yaml(job_location)
                else:
                    job_data = load_job(
                        workflow=tool_location,
                        job=job_location
                    )
            except Exception as ex:
                logging.error(f"Failed to load test case {i+1} to run {tool_location} with {job_location}")
                test_data.update({
                    "error": "Failed to load test case",
                    "finished": True
                })

        job_data["outputs_folder"] = get_dir(os.path.join(args.tmp, run_id))

        test_data.update({
            "job":  job_data,                                                 # already parsed, includes "outputs_folder"
            "tool": tool_location,
            "dag_id": get_rootname(test_data["tool"]),
            "index": i+1,                                                     # to know test case number, 1-based to correspond to --range
            "finished": test_data.get("finished", False)                      # to indicate whether the test was finished or not
        })
        logging.info(f"Successfully loaded test case {i+1} to run {tool_location} with {job_location} as {run_id}")
        suite_data_filtered[run_id] = test_data                               # use "run_id" as a key for fast access when checking results
    return suite_data_filtered
Ejemplo n.º 3
0
    def execute(self, context):
        """
        Loads job Object from the context. Sets "tmp_folder" and "output_folder"
        if they have not been set before in the job. In case "tmp_folder" and/or
        "output_folder" were read from the job and are relative, resolves paths
        relative to the "tmp_folder" and/or "outputs_folder" from "cwl_args".
        Dumps step outputs as a json file into "tmp_folder". Writes to X-Com report
        file location.
        """

        setup_cwl_logger(context["ti"])
        post_status(context)

        # for easy access
        dag_id = context["dag"].dag_id
        workflow = context["dag"].workflow
        run_id = context["run_id"].replace(":", "_").replace(
            "+", "_")  # to make it dumpable by json
        cwl_args = context["dag"].default_args["cwl"]

        # Loads job from dag_run configuration. Sets defaults from "workflow". Fails on missing input files
        job_data = load_job(workflow=workflow,
                            job=context["dag_run"].conf["job"],
                            cwl_args=cwl_args)

        job_data["tmp_folder"] = get_dir(
            get_absolute_path(
                job_data.get(
                    "tmp_folder",
                    mkdtemp(dir=cwl_args["tmp_folder"],
                            prefix=dag_id + "_" + run_id + "_")),
                cwl_args["tmp_folder"]))

        job_data["outputs_folder"] = get_dir(
            get_absolute_path(
                job_data.get(
                    "outputs_folder",
                    os.path.join(cwl_args["outputs_folder"], dag_id, run_id)),
                cwl_args["outputs_folder"]))

        _, _, _, step_report = get_temp_folders(task_id=self.task_id,
                                                job_data=job_data)

        dump_json(job_data, step_report)

        return step_report
Ejemplo n.º 4
0
def load_test_suite(args):
    """
    Loads tests from the provided --suite file.
    Selects tests based on the indices from --range.
    
    Updates tools locations to be absolute, loads
    jobs and updates all inputs files locations to
    be absolute too. Adds "outputs_folder" to the job,
    as well as the "index" to indicate which test case
    was used.

    Adds run_id's as keys for easy access and proper
    test identification when receiving results.
    """

    suite_data = load_yaml(args.suite)
    suite_dir = os.path.dirname(args.suite)
    suite_data_filtered = OrderedDict()                                       # use OrderedDict just to keep it similar to suite_data
    for i in args.range:
        test_data = suite_data[i]
        run_id = str(uuid.uuid4())
        tool_location = get_absolute_path(test_data["tool"], suite_dir)
        job_location = get_absolute_path(test_data["job"], suite_dir)
        if "job" in test_data:
            job_data = load_job(
                workflow=tool_location,
                job=job_location
            )
        else:
            job_data = {}
        job_data["outputs_folder"] = get_dir(os.path.join(args.tmp, run_id))

        test_data.update({
            "job":  job_data,                                                 # already parsed, includes "outputs_folder"
            "tool": tool_location,
            "dag_id": get_rootname(test_data["tool"]),
            "index": i+1,                                                     # to know test case number, 1-based to correspond to --range
            "finished": False                                                 # to indicate whether the test was finished or not
        })
        logging.info(f"Load test case {i+1} to run {tool_location} with {job_location} as {run_id}")
        suite_data_filtered[run_id] = test_data                               # use "run_id" as a key for fast access when checking results
    return suite_data_filtered
Ejemplo n.º 5
0
def overwrite_deprecated_dag(dag_location, deprecated_dags_folder=None):
    """
    Loads DAG content from "dag_location" file. Searches for "dag.create()" command.
    If not found, we don't need to upgrade this DAG (it's either not from CWL-Airflow,
    or already in a new format). If "deprecated_dags_folder" is not None, copies original
    DAG file there before DAG upgrading. After copying deprecated DAG to the
    "deprecated_dags_folder" updates ".airflowignore" with DAG file basename to exclude
    it from Airflow parsing. Upgraded DAG will always include base64 encoded gzip
    compressed workflow content. In case "workflow_location" is relative path, it will
    be resolved based on the dirname of "dag_location" (useful for tests only, because
    all our old DAGs always have absolute path to the CWL file). Function doesn't backup
    or update the original CWL file.
    TODO: in case more coplicated DAG files that include "default_args", etc, this function
    should be updated to the more complex one.
    """

    with open(dag_location,
              "r+") as io_stream:  # open for both reading and writing

        dag_content = io_stream.read()

        if not re.search("dag\\.create\\(\\)",
                         dag_content):  # do nothing if it wasn't old-style DAG
            return

        workflow_location = get_absolute_path(  # resolve relative to dirname of "dag_location" (good for tests)
            re.search("(cwl_workflow\\s*=\\s*[\"|'])(.+?)([\"|'])",
                      dag_content).group(2), os.path.dirname(dag_location))

        dag_id = re.search("(dag_id\\s*=\\s*[\"|'])(.+?)([\"|'])",
                           dag_content).group(2)

        compressed_workflow_content = get_compressed(
            fast_cwl_load(
                workflow_location
            )  # no "run" embedding or convertion to Workflow. If DAG worked, cwl should be ok too
        )

        if deprecated_dags_folder is not None:  # copy old DAG to the folder with deprecated DAGs, add ".airflowignore"
            get_dir(
                deprecated_dags_folder
            )  # try to create "deprecated_dags_folder" if it doesn't exist
            shutil.copy(dag_location, deprecated_dags_folder)  # copy DAG file
            ignore = os.path.join(deprecated_dags_folder, ".airflowignore")
            with open(
                    ignore, "a"
            ) as output_stream:  # add deprecated DAG to ".airflowignore"
                output_stream.write(os.path.basename(dag_location) + "\n")

        io_stream.seek(0)  # rewind "dag_location" file to the beginning
        io_stream.write(
            DAG_TEMPLATE.format(compressed_workflow_content, dag_id))
        io_stream.truncate(
        )  # remove old data at the end of a file if anything became shorter than original
Ejemplo n.º 6
0
def get_normalized_args(args, skip_list=None, cwd=None):
    """
    Converts all relative path arguments to absolute
    ones relatively to the cwd or current working directory.
    Skipped arguments and None will be returned unchanged.
    """

    cwd = getcwd() if cwd is None else cwd
    skip_list = [] if skip_list is None else skip_list

    normalized_args = {}
    for key, value in args.__dict__.items():
        if key not in skip_list and value is not None:
            if isinstance(value, list):
                for v in value:
                    normalized_args.setdefault(key, []).append(
                        get_absolute_path(v, cwd))
            else:
                normalized_args[key] = get_absolute_path(value, cwd)
        else:
            normalized_args[key] = value
    return argparse.Namespace(**normalized_args)