Ejemplo n.º 1
0
def load_job(workflow, job, cwl_args=None, cwd=None):
    """
    Tries to load json object from "job". If failed, assumes that
    "job" has been already parsed into Object. Inits loaded "job_data"
    based on the "workflow" (mostly for setting defaults from the workflow
    inputs; never fails). "cwl_args" can be used to update parameters for
    loading and runtime contexts.

    If "job" was file, resolves relative paths based on the job file location.
    If "job" was already parsed into Object, resolves relative paths based on
    "cwd". If "cwd" was None uses "inputs_folder" value from "cwl_args" or
    its default value returned from "get_default_cwl_args" function.

    Checking links after relative paths are resolved is disabled (checklinks
    is set to False in both places). This will prevent rasing an exception by
    schema salad in those cases when an input file will be created from the
    provided content during workflow execution.
    
    Always returns CommentedMap
    """

    cwl_args = {} if cwl_args is None else cwl_args

    default_cwl_args = get_default_cwl_args(cwl_args)
    cwd = default_cwl_args["inputs_folder"] if cwd is None else cwd

    loading_context = setup_loadingContext(
        LoadingContext(default_cwl_args), RuntimeContext(default_cwl_args),
        argparse.Namespace(**default_cwl_args))

    job_copy = deepcopy(job)

    try:
        job_data, _ = loading_context.loader.resolve_ref(job_copy,
                                                         checklinks=False)
    except (FileNotFoundError, SchemaSaladException) as err:
        job_data = load_yaml(json.dumps(job_copy))
        job_data["id"] = file_uri(cwd) + "/"
        job_data, metadata = loading_context.loader.resolve_all(
            job_data, job_data["id"], checklinks=False)

    initialized_job_data = init_job_order(
        job_order_object=job_data,
        args=argparse.Namespace(**default_cwl_args),
        process=slow_cwl_load(workflow=workflow, cwl_args=default_cwl_args),
        loader=loading_context.loader,
        stdout=os.devnull)

    return initialized_job_data
Ejemplo n.º 2
0
def collect_reports(context, task_ids=None):
    """
    Collects reports from "context" for specified "task_ids".
    If "task_ids" was not set, use all tasks from DAG. Loads
    and merges data from reports.
    """

    task_ids = context["dag"].task_ids if task_ids is None else task_ids

    job_data = {}
    for report_location in context["ti"].xcom_pull(task_ids=task_ids):
        if report_location is not None:
            job_data = merge(job_data, load_yaml(report_location))

    return job_data
Ejemplo n.º 3
0
def remove_tmp_data(dr):
    logging.info(f"""Searching tmp data for dag_id: {dr.dag_id}, run_id: {dr.run_id}""")
    tmp_folder_set = set()
    for ti in dr.get_task_instances():
        logging.info(f"""Task: {ti.task_id}, execution_date: {ti.execution_date}, pid: {ti.pid}, state: {ti.state}""")
        try:
            logging.info(" - searching for tmp_folder in the report file")
            report_location = ti.xcom_pull(task_ids=ti.task_id)
            tmp_folder_set.add(load_yaml(report_location)["tmp_folder"])
        except Exception:
            logging.info(" - report file has been already deleted or it's missing tmp_folder field")
    for tmp_folder in tmp_folder_set:
        try:
            logging.info(f"""Removing tmp data from {tmp_folder}""")
            shutil.rmtree(tmp_folder)
        except Exception as ex:
            logging.error(f"""Failed to delete {tmp_folder}\n {ex}""")
Ejemplo n.º 4
0
def load_test_suite(args):
    """
    Loads tests from the provided --suite file.
    Selects tests based on the indices from --range.
    
    Updates tools locations to be absolute, loads
    jobs and updates all inputs files locations to
    be absolute too. Adds "outputs_folder" to the job,
    as well as the "index" to indicate which test case
    was used.

    Adds run_id's as keys for easy access and proper
    test identification when receiving results.
    """

    suite_data = load_yaml(args.suite)
    suite_dir = os.path.dirname(args.suite)
    suite_data_filtered = OrderedDict()                                       # use OrderedDict just to keep it similar to suite_data
    for i in args.range:
        test_data = suite_data[i]
        run_id = str(uuid.uuid4())
        tool_location = get_absolute_path(test_data["tool"], suite_dir)
        job_location = get_absolute_path(test_data["job"], suite_dir)
        if "job" in test_data:
            job_data = load_job(
                workflow=tool_location,
                job=job_location
            )
        else:
            job_data = {}
        job_data["outputs_folder"] = get_dir(os.path.join(args.tmp, run_id))

        test_data.update({
            "job":  job_data,                                                 # already parsed, includes "outputs_folder"
            "tool": tool_location,
            "dag_id": get_rootname(test_data["tool"]),
            "index": i+1,                                                     # to know test case number, 1-based to correspond to --range
            "finished": False                                                 # to indicate whether the test was finished or not
        })
        logging.info(f"Load test case {i+1} to run {tool_location} with {job_location} as {run_id}")
        suite_data_filtered[run_id] = test_data                               # use "run_id" as a key for fast access when checking results
    return suite_data_filtered
Ejemplo n.º 5
0
def assert_and_fix_args_for_test(args):
    """
    Asserts, fixes and sets parameters from test parser.

    Tries to convert --range argument to a list of indices.
    If --range wasn't set or indices are not valid, set it
    to include all tests from --suite. Dublicates are removed.
    This function should never fail.
    """

    suite_data = load_yaml(args.suite)
    suite_len = len(suite_data)
    try:
        selected_indices = []
        for interval in args.range.split(","):
            parsed_interval = [int(i) - 1 for i in interval.split("-")
                               ]  # switch to 0-based coodrinates
            if len(parsed_interval) == 2:
                # safety check
                if parsed_interval[0] >= parsed_interval[1]: raise ValueError
                if parsed_interval[0] >= suite_len: raise ValueError
                if parsed_interval[1] >= suite_len: raise ValueError
                selected_indices.extend(
                    range(parsed_interval[0],
                          parsed_interval[1] + 1)  # need closed interval
                )
            elif len(parsed_interval) == 1:
                # safety check
                if parsed_interval[0] >= suite_len: raise ValueError
                selected_indices.append(parsed_interval[0])
            else:
                raise ValueError
    except (AttributeError, IndexError, ValueError):
        selected_indices = list(range(0, suite_len))

    args.range = sorted(
        set(selected_indices))  # convert to set to remove possible dublicates
Ejemplo n.º 6
0
def test_load_yaml_from_str_should_fail(location):
    with pytest.raises((YAMLError, ValueError)):
        data = load_yaml(location)
Ejemplo n.º 7
0
def test_load_yaml_from_file_should_fail():
    location = path.join(DATA_FOLDER, "jobs", "dummy.json")
    with pytest.raises(ValueError):
        data = load_yaml(location)
Ejemplo n.º 8
0
def test_load_yaml_from_file():
    location = path.join(DATA_FOLDER, "jobs", "bam-bedgraph-bigwig.json")
    data = load_yaml(location)
    assert "bam_file" in data, "Failed to load yaml (json)"