Ejemplo n.º 1
0
def test_other_does_not_contain_child():
    a = dags.DAG()
    b = dags.DAG()

    a_child = a.layer(name="a_child")

    assert a_child not in b
Ejemplo n.º 2
0
def test_other_does_not_contain_child_even_if_same_name():
    a = dags.DAG()
    b = dags.DAG()

    a_child = a.layer(name="child")
    b_child = b.layer(name="child")

    assert a_child not in b
    assert b_child not in a
Ejemplo n.º 3
0
def test_node_status_file_not_default():
    dag = dags.DAG(node_status_file=dags.NodeStatusFile(
        Path("node_status_file"), update_time=60, always_update=True))

    writer = DAGWriter(dag)
    lines = dagfile_lines(writer)
    assert "NODE_STATUS_FILE node_status_file 60 ALWAYS-UPDATE" in lines
Ejemplo n.º 4
0
def test_node_status_file_default():
    dag = dags.DAG(
        node_status_file=dags.NodeStatusFile(Path("node_status_file")))

    writer = DAGWriter(dag)
    lines = dagfile_lines(writer)
    assert "NODE_STATUS_FILE node_status_file" in lines
Ejemplo n.º 5
0
def dag(request):
    dag = dags.DAG()

    yield dag

    if request.session.testsfailed:
        print(dag.describe())
Ejemplo n.º 6
0
def test_dagman_job_attributes_with_two_attrs():
    dag = dags.DAG(dagman_job_attributes={"foo": "bar", "wizard": 17})

    writer = DAGWriter(dag)
    lines = dagfile_lines(writer)
    assert all(("SET_JOB_ATTR foo = bar" in lines, "SET_JOB_ATTR wizard = 17"
                in lines))
Ejemplo n.º 7
0
def test_jobstate_log():
    logfile = Path("i_am_the_jobstate.log").absolute()

    dag = dags.DAG(jobstate_log=logfile)
    writer = DAGWriter(dag)

    lines = dagfile_lines(writer)
    assert "JOBSTATE_LOG {}".format(logfile.as_posix()) in lines
Ejemplo n.º 8
0
def make_outer_dag(
    dest_dir,
    requirements,
    source_dir,
    test_mode,
    transfer_manifest_path,
    unique_id,
    working_dir,
):

    # Only import htcondor.dags submit-side
    import htcondor.dags as dags

    outer_dag = dags.DAG()

    outer_dag.layer(
        name="calc_work",
        submit_description=htcondor.Submit({
            "output":
            "calc_work.out",
            "error":
            "calc_work.err",
            "log":
            "calc_work.log",
            "arguments":
            "generate {} {}".format(source_dir,
                                    '--test-mode' if test_mode else ''),
            "should_transfer_files":
            "yes",
            **shared_submit_descriptors(unique_id, requirements),
        }),
        post=dags.Script(
            executable=THIS_FILE,
            arguments=[
                "write_subdag",
                source_dir,
                "source_manifest.txt",
                dest_dir,
                "destination_manifest.txt",
                transfer_manifest_path,
                "--requirements_file=requirements.txt"
                if requirements is not None else "",
                "--unique-id={}".format(unique_id)
                if unique_id is not None else "",
                "--test-mode" if test_mode else "",
            ],
        ),
    ).child_subdag(
        name="inner",
        dag_file=working_dir / "inner.dag",
        post=dags.Script(executable=THIS_FILE,
                         arguments=["analyze", transfer_manifest_path]),
    )

    return outer_dag
Ejemplo n.º 9
0
def test_dot_config_not_default():
    dag = dags.DAG(dot_config=dags.DotConfig(
        Path("dag.dot"),
        update=True,
        overwrite=False,
        include_file=Path("include-me.dot"),
    ))

    writer = DAGWriter(dag)
    lines = dagfile_lines(writer)
    assert "DOT dag.dot UPDATE DONT-OVERWRITE INCLUDE include-me.dot" in lines
Ejemplo n.º 10
0
def rescue_dag():
    sub = htcondor.Submit(
        dict(executable="/bin/echo", arguments="hi", request_memory="16MB", request_disk="1MB",)
    )

    dag = dags.DAG()

    a = dag.layer(name="a", submit_description=sub)
    b = a.child_layer(name="b", submit_description=sub)
    c = b.child_layer(
        name="c",
        submit_description=sub,
        abort=dags.DAGAbortCondition(node_exit_value=0, dag_return_value=1),
    )
    d = c.child_layer(name="d", submit_description=sub)

    return dag
Ejemplo n.º 11
0
def create_dag_file(dag_graph, dag_dir_name, information_dict):
    """

    :param dag_graph: a DAG networkx graph representing the dependencies between the different jobs,
                        where a job is specified by 'job_name'
    :param dag_dir_name: Directory for the dag. Will be overwritten.
    :param information_dict: a dictionary of dictionaries: has a key for each 'job_name'.
            in information_dict['job_name'] there are keys for
                the python script path (py_script_path)
                the batch parameters (batch_parameters). Will be set to [] by default
                'kargs_dict' is a dictionary holding all parameters for running a job as specified in send_job scripts.

    :return:
    """
    nodes = list(nx.topological_sort(dag_graph))
    # layers = []
    dag = dags.DAG()
    for job_name in nodes:
        job_submit = create_job_submit_format_from_python_script(
            information_dict[job_name]['py_script_path'], job_name,
            **information_dict[job_name]['kargs_dict'])
        if 'batch_parameters' not in information_dict[job_name].keys():
            information_dict[job_name]['batch_parameters'] = [{}]
        layer = dag.layer(name=job_name,
                          submit_description=job_submit,
                          vars=information_dict[job_name]['batch_parameters'])
        # layers.append(layer)
        parents = list(dag_graph.predecessors(job_name))
        if parents:
            for parent in parents:
                layer.add_parents(dag.glob(parent))

    print(dag.describe())
    if not os.path.exists(exec_dir + dag_dir_name):
        os.mkdir(exec_dir + dag_dir_name)
    shutil.rmtree(exec_dir + dag_dir_name, ignore_errors=True)
    dag_file = dags.write_dag(dag, exec_dir + dag_dir_name)
    return dag_file
Ejemplo n.º 12
0
#!/usr/bin/env python

from pathlib import Path

import htcondor
from htcondor import dags

# We will split words.txt into five chunks.
NUM_CHUNKS = 5

# Start by creating the DAG object itself.
# This object "holds" the DAG information.
# Meta-information like DAGMan configuration, the location of the node status
# file, etc., lives on this object.
# It's methods are used to create node layers and possibly subDAGs.
diamond = dags.DAG()

# This is the "split" step.
# It has no parent layer, so it is a root layer of the DAG.
# Root layers are created from the DAG object itself.
split_words = diamond.layer(
    name="split_words",
    submit_description=htcondor.Submit({
        "executable": "split_words.py",
        "arguments": str(NUM_CHUNKS),
        "transfer_input_files": "words.txt",
        "output": "split_words.out",
        "error": "split_words.err",
    }),
)
Ejemplo n.º 13
0
def make_outer_dag(
    direction: TransferDirection,
    local_dir: Path,
    remote_dir: Path,
    working_dir: Path,
    requirements: Optional[str],
    unique_id: Optional[str],
    test_mode: bool,
):
    # Only import htcondor.dags submit-side
    import htcondor.dags as dags

    outer_dag = dags.DAG()

    transfer_manifest_path = local_dir / TRANSFER_MANIFEST_FILE_NAME

    if requirements:
        write_requirements_file(working_dir, requirements)

    # copy this script into the working dir for all further use
    executable = working_dir / THIS_FILE.name
    shutil.copy2(str(THIS_FILE), str(executable))

    outer_dag.layer(
        name="make_remote_file_manifest",
        submit_description=htcondor.Submit({
            "output":
            "make_remote_file_manifest.out",
            "error":
            "make_remote_file_manifest.err",
            "log":
            "make_remote_file_manifest.log",
            "arguments":
            "{} {} {}".format(
                Commands.MAKE_REMOTE_FILE_MANIFEST,
                remote_dir,
                "--test-mode" if test_mode else "",
            ),
            "should_transfer_files":
            "yes",
            **shared_submit_descriptors(
                executable=executable,
                unique_id=unique_id,
                requirements=requirements,
            ),
        }),
        post=dags.Script(
            executable=executable,
            arguments=[
                Commands.WRITE_INNER_DAG,
                direction,
                remote_dir,
                REMOTE_MANIFEST_FILE_NAME,
                local_dir,
                "--requirements_file={}".format(REQUIREMENTS_FILE_NAME)
                if requirements is not None else "",
                "--unique_id={}".format(unique_id)
                if unique_id is not None else "",
                "--test-mode" if test_mode else "",
            ],
        ),
    ).child_subdag(
        name="inner",
        dag_file=working_dir / INNER_DAG_NAME,
        post=dags.Script(
            executable=executable,
            arguments=[
                Commands.FINALIZE_TRANSFER_MANIFEST, transfer_manifest_path
            ],
        ),
    )

    logging.info("Outer DAG shape:\n{}".format(outer_dag.describe()))

    return outer_dag
Ejemplo n.º 14
0
def test_config_file_gets_written_if_config_given(dag_dir):
    dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10})
    dags.write_dag(dag, dag_dir)

    assert (dag_dir / dags.CONFIG_FILE_NAME).exists()
Ejemplo n.º 15
0
def test_config_command_in_dagfile_if_config_given():
    dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10})
    writer = DAGWriter(dag)

    lines = dagfile_lines(writer)
    assert "CONFIG {}".format(dags.CONFIG_FILE_NAME) in lines
Ejemplo n.º 16
0
def make_inner_dag(
    requirements: Optional[str],
    xfer_cmd_info: T_CMD_INFO,
    verify_cmd_info: T_CMD_INFO,
    unique_id: Optional[str] = None,
    test_mode: bool = False,
):

    # Only import htcondor.dags submit-side
    import htcondor.dags as dags

    inner_dag = dags.DAG(
        max_jobs_by_category={"TRANSFER_JOBS": 1} if test_mode else None)

    inner_dag.layer(
        name="xfer",
        submit_description=htcondor.Submit({
            "output":
            "$(src_file_noslash).out",
            "error":
            "$(src_file_noslash).err",
            "log":
            "xfer_file.log",
            "arguments":
            classad.quote("exec '$(src_file)'"),
            "should_transfer_files":
            "yes",
            "transfer_output_files":
            "{}, metadata".format(SANDBOX_FILE_NAME),
            "transfer_output_remaps":
            classad.quote(
                "{} = $(dest); metadata = $(src_file_noslash).metadata".format(
                    SANDBOX_FILE_NAME)),
            **shared_submit_descriptors(unique_id, requirements),
        }),
        vars=xfer_cmd_info,
        post=dags.Script(
            executable=THIS_FILE,
            arguments=[
                "verify", "--json=xfer_commands.json", "--fileid", "$JOB"
            ],
        ),
    )

    inner_dag.layer(
        name="verify",
        submit_description=htcondor.Submit({
            "output":
            "$(src_file_noslash).out",
            "error":
            "$(src_file_noslash).err",
            "log":
            "verify_file.log",
            "arguments":
            classad.quote("verify_remote '$(src_file)'"),
            "should_transfer_files":
            "yes",
            "transfer_output_files":
            "metadata",
            "transfer_output_remaps":
            classad.quote("metadata = $(src_file_noslash).metadata"),
            **shared_submit_descriptors(unique_id, requirements),
        }),
        vars=verify_cmd_info,
        post=dags.Script(
            executable=THIS_FILE,
            arguments=[
                "verify", "--json=verify_commands.json", "--fileid", "$JOB"
            ],
        ),
    )

    return inner_dag
Ejemplo n.º 17
0
def test_config_file_has_right_contents(dag_dir):
    dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10})
    dags.write_dag(dag, dag_dir)

    assert ("DAGMAN_MAX_JOBS_IDLE = 10"
            in (dag_dir / dags.CONFIG_FILE_NAME).read_text().splitlines())
Ejemplo n.º 18
0
def test_dagman_job_attributes_with_one_attr():
    dag = dags.DAG(dagman_job_attributes={"foo": "bar"})

    writer = DAGWriter(dag)
    lines = dagfile_lines(writer)
    assert "SET_JOB_ATTR foo = bar" in lines
Ejemplo n.º 19
0
def test_dot_config_default():
    dag = dags.DAG(dot_config=dags.DotConfig(Path("dag.dot")))

    writer = DAGWriter(dag)
    lines = dagfile_lines(writer)
    assert "DOT dag.dot DONT-UPDATE OVERWRITE" in lines
Ejemplo n.º 20
0
def test_max_jobs_per_category_with_two_categories():
    dag = dags.DAG(max_jobs_by_category={"foo": 5, "bar": 10})

    writer = DAGWriter(dag)
    lines = dagfile_lines(writer)
    assert all(("CATEGORY foo 5" in lines, "CATEGORY bar 10" in lines))
Ejemplo n.º 21
0
#!/usr/bin/env python

from pathlib import Path
import glob

import htcondor
from htcondor import dags

analysis_dag = dags.DAG()

# This is the "count words in chunk" step, which now lives in the sub-DAG.
# The split will have run by the time this code executes.
# Therefore, we can inspect the directory to find out how many chunks were created.

# determine the number of files in this directory that match the pattern
num_chunks = len(glob.glob("words_*.txt"))

count_words = analysis_dag.layer(
    name="count_words",
    submit_description=htcondor.Submit({
        "executable": "count_words.py",
        "arguments": "$(word_set)",
        "transfer_input_files": "words_$(word_set).txt",
        "output": "count_words_$(word_set).out",
        "error": "count_words_$(word_set).err",
    }),
    vars=[{
        "word_set": str(n)
    } for n in range(num_chunks)],
)
Ejemplo n.º 22
0
def make_inner_dag(
    direction: TransferDirection,
    requirements: Optional[str],
    transfer_cmd_info: T_CMD_INFO,
    verify_cmd_info: T_CMD_INFO,
    unique_id: Optional[str] = None,
    test_mode: bool = False,
):
    # Only import htcondor.dags submit-side
    import htcondor.dags as dags

    inner_dag = dags.DAG(
        max_jobs_by_category={"TRANSFER_JOBS": 1} if test_mode else None)

    tof = [METADATA_FILE_NAME]
    tor = {METADATA_FILE_NAME: "$(flattened_name).metadata"}

    pull_tof = [SANDBOX_FILE_NAME]
    pull_tor = {SANDBOX_FILE_NAME: "$(flattened_name)"}

    shared_descriptors = shared_submit_descriptors(unique_id=unique_id,
                                                   requirements=requirements)

    inner_dag.layer(
        name=direction,
        submit_description=htcondor.Submit({
            "output":
            "$(flattened_name).out",
            "error":
            "$(flattened_name).err",
            "log":
            "transfer_file.log",
            "arguments":
            classad.quote("{} '$(remote_file)'".format(
                DIRECTION_TO_COMMAND[direction])),
            "should_transfer_files":
            "yes",
            "transfer_input_files":
            "$(local_file)" if direction is TransferDirection.PUSH else "",
            "transfer_output_files":
            ", ".join(tof + (
                pull_tof if direction is TransferDirection.PULL else [])),
            "transfer_output_remaps":
            classad.quote(" ; ".join(
                "{} = {}".format(k, v) for k, v in {
                    **tor,
                    **(pull_tor if TransferDirection.PULL else {}),
                }.items())),
            **shared_descriptors,
        }),
        vars=transfer_cmd_info,
        post=dags.Script(
            executable=THIS_FILE,
            arguments=[
                Commands.POST_TRANSFER,
                "--cmd-info",
                TRANSFER_COMMANDS_FILE_NAME,
                "--key",
                "$JOB",
            ],
        ),
    )

    inner_dag.layer(
        name="verify",
        submit_description=htcondor.Submit({
            "output":
            "$(flattened_name).out",
            "error":
            "$(flattened_name).err",
            "log":
            "verify_file.log",
            "arguments":
            classad.quote("{} '$(remote_file)'".format(
                Commands.GET_REMOTE_METADATA)),
            "should_transfer_files":
            "yes",
            "transfer_output_files":
            ", ".join(tof),
            "transfer_output_remaps":
            classad.quote(" ; ".join("{} = {}".format(k, v)
                                     for k, v in tor.items())),
            **shared_descriptors,
        }),
        vars=verify_cmd_info,
        post=dags.Script(
            executable=THIS_FILE,
            arguments=[
                Commands.POST_TRANSFER,
                "--cmd-info",
                VERIFY_COMMANDS_FILE_NAME,
                "--key",
                "$JOB",
                "--only-verify",
            ],
        ),
    )

    logging.info("Inner DAG shape:\n{}".format(inner_dag.describe()))

    return inner_dag
Ejemplo n.º 23
0
#!/usr/bin/env python

from pathlib import Path

import htcondor
from htcondor import dags

top_layer_dag = dags.DAG()

# This is the "split" step. It stays in the top-level DAG.
# Note that split_words.py no longer takes arguments. It determines the number
# of chunks itself.
split_words = top_layer_dag.layer(
    name="split_words",
    submit_description=htcondor.Submit({
        "executable": "split_words.py",
        "transfer_input_files": "words.txt",
        "output": "split_words.out",
        "error": "split_words.err",
    }),
    post=dags.Script(executable="make_analysis_dag.py"),
)

analysis_subdag = split_words.child_subdag(name="analysis",
                                           dag_file="analysis.dag")

# Write out the DAG.
# Now that we're going to have two DAG input files in this directory, we need
# to give them unique names.
this_dir = Path(__file__).parent
dags.write_dag(top_layer_dag, this_dir, dag_file_name="top_level.dag")
Ejemplo n.º 24
0
def test_max_jobs_per_category_with_one_category():
    dag = dags.DAG(max_jobs_by_category={"foo": 5})

    writer = DAGWriter(dag)
    lines = dagfile_lines(writer)
    assert "CATEGORY foo 5" in lines