def test_other_does_not_contain_child(): a = dags.DAG() b = dags.DAG() a_child = a.layer(name="a_child") assert a_child not in b
def test_other_does_not_contain_child_even_if_same_name(): a = dags.DAG() b = dags.DAG() a_child = a.layer(name="child") b_child = b.layer(name="child") assert a_child not in b assert b_child not in a
def test_node_status_file_not_default(): dag = dags.DAG(node_status_file=dags.NodeStatusFile( Path("node_status_file"), update_time=60, always_update=True)) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert "NODE_STATUS_FILE node_status_file 60 ALWAYS-UPDATE" in lines
def test_node_status_file_default(): dag = dags.DAG( node_status_file=dags.NodeStatusFile(Path("node_status_file"))) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert "NODE_STATUS_FILE node_status_file" in lines
def dag(request): dag = dags.DAG() yield dag if request.session.testsfailed: print(dag.describe())
def test_dagman_job_attributes_with_two_attrs(): dag = dags.DAG(dagman_job_attributes={"foo": "bar", "wizard": 17}) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert all(("SET_JOB_ATTR foo = bar" in lines, "SET_JOB_ATTR wizard = 17" in lines))
def test_jobstate_log(): logfile = Path("i_am_the_jobstate.log").absolute() dag = dags.DAG(jobstate_log=logfile) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert "JOBSTATE_LOG {}".format(logfile.as_posix()) in lines
def make_outer_dag( dest_dir, requirements, source_dir, test_mode, transfer_manifest_path, unique_id, working_dir, ): # Only import htcondor.dags submit-side import htcondor.dags as dags outer_dag = dags.DAG() outer_dag.layer( name="calc_work", submit_description=htcondor.Submit({ "output": "calc_work.out", "error": "calc_work.err", "log": "calc_work.log", "arguments": "generate {} {}".format(source_dir, '--test-mode' if test_mode else ''), "should_transfer_files": "yes", **shared_submit_descriptors(unique_id, requirements), }), post=dags.Script( executable=THIS_FILE, arguments=[ "write_subdag", source_dir, "source_manifest.txt", dest_dir, "destination_manifest.txt", transfer_manifest_path, "--requirements_file=requirements.txt" if requirements is not None else "", "--unique-id={}".format(unique_id) if unique_id is not None else "", "--test-mode" if test_mode else "", ], ), ).child_subdag( name="inner", dag_file=working_dir / "inner.dag", post=dags.Script(executable=THIS_FILE, arguments=["analyze", transfer_manifest_path]), ) return outer_dag
def test_dot_config_not_default(): dag = dags.DAG(dot_config=dags.DotConfig( Path("dag.dot"), update=True, overwrite=False, include_file=Path("include-me.dot"), )) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert "DOT dag.dot UPDATE DONT-OVERWRITE INCLUDE include-me.dot" in lines
def rescue_dag(): sub = htcondor.Submit( dict(executable="/bin/echo", arguments="hi", request_memory="16MB", request_disk="1MB",) ) dag = dags.DAG() a = dag.layer(name="a", submit_description=sub) b = a.child_layer(name="b", submit_description=sub) c = b.child_layer( name="c", submit_description=sub, abort=dags.DAGAbortCondition(node_exit_value=0, dag_return_value=1), ) d = c.child_layer(name="d", submit_description=sub) return dag
def create_dag_file(dag_graph, dag_dir_name, information_dict): """ :param dag_graph: a DAG networkx graph representing the dependencies between the different jobs, where a job is specified by 'job_name' :param dag_dir_name: Directory for the dag. Will be overwritten. :param information_dict: a dictionary of dictionaries: has a key for each 'job_name'. in information_dict['job_name'] there are keys for the python script path (py_script_path) the batch parameters (batch_parameters). Will be set to [] by default 'kargs_dict' is a dictionary holding all parameters for running a job as specified in send_job scripts. :return: """ nodes = list(nx.topological_sort(dag_graph)) # layers = [] dag = dags.DAG() for job_name in nodes: job_submit = create_job_submit_format_from_python_script( information_dict[job_name]['py_script_path'], job_name, **information_dict[job_name]['kargs_dict']) if 'batch_parameters' not in information_dict[job_name].keys(): information_dict[job_name]['batch_parameters'] = [{}] layer = dag.layer(name=job_name, submit_description=job_submit, vars=information_dict[job_name]['batch_parameters']) # layers.append(layer) parents = list(dag_graph.predecessors(job_name)) if parents: for parent in parents: layer.add_parents(dag.glob(parent)) print(dag.describe()) if not os.path.exists(exec_dir + dag_dir_name): os.mkdir(exec_dir + dag_dir_name) shutil.rmtree(exec_dir + dag_dir_name, ignore_errors=True) dag_file = dags.write_dag(dag, exec_dir + dag_dir_name) return dag_file
#!/usr/bin/env python from pathlib import Path import htcondor from htcondor import dags # We will split words.txt into five chunks. NUM_CHUNKS = 5 # Start by creating the DAG object itself. # This object "holds" the DAG information. # Meta-information like DAGMan configuration, the location of the node status # file, etc., lives on this object. # It's methods are used to create node layers and possibly subDAGs. diamond = dags.DAG() # This is the "split" step. # It has no parent layer, so it is a root layer of the DAG. # Root layers are created from the DAG object itself. split_words = diamond.layer( name="split_words", submit_description=htcondor.Submit({ "executable": "split_words.py", "arguments": str(NUM_CHUNKS), "transfer_input_files": "words.txt", "output": "split_words.out", "error": "split_words.err", }), )
def make_outer_dag( direction: TransferDirection, local_dir: Path, remote_dir: Path, working_dir: Path, requirements: Optional[str], unique_id: Optional[str], test_mode: bool, ): # Only import htcondor.dags submit-side import htcondor.dags as dags outer_dag = dags.DAG() transfer_manifest_path = local_dir / TRANSFER_MANIFEST_FILE_NAME if requirements: write_requirements_file(working_dir, requirements) # copy this script into the working dir for all further use executable = working_dir / THIS_FILE.name shutil.copy2(str(THIS_FILE), str(executable)) outer_dag.layer( name="make_remote_file_manifest", submit_description=htcondor.Submit({ "output": "make_remote_file_manifest.out", "error": "make_remote_file_manifest.err", "log": "make_remote_file_manifest.log", "arguments": "{} {} {}".format( Commands.MAKE_REMOTE_FILE_MANIFEST, remote_dir, "--test-mode" if test_mode else "", ), "should_transfer_files": "yes", **shared_submit_descriptors( executable=executable, unique_id=unique_id, requirements=requirements, ), }), post=dags.Script( executable=executable, arguments=[ Commands.WRITE_INNER_DAG, direction, remote_dir, REMOTE_MANIFEST_FILE_NAME, local_dir, "--requirements_file={}".format(REQUIREMENTS_FILE_NAME) if requirements is not None else "", "--unique_id={}".format(unique_id) if unique_id is not None else "", "--test-mode" if test_mode else "", ], ), ).child_subdag( name="inner", dag_file=working_dir / INNER_DAG_NAME, post=dags.Script( executable=executable, arguments=[ Commands.FINALIZE_TRANSFER_MANIFEST, transfer_manifest_path ], ), ) logging.info("Outer DAG shape:\n{}".format(outer_dag.describe())) return outer_dag
def test_config_file_gets_written_if_config_given(dag_dir): dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10}) dags.write_dag(dag, dag_dir) assert (dag_dir / dags.CONFIG_FILE_NAME).exists()
def test_config_command_in_dagfile_if_config_given(): dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10}) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert "CONFIG {}".format(dags.CONFIG_FILE_NAME) in lines
def make_inner_dag( requirements: Optional[str], xfer_cmd_info: T_CMD_INFO, verify_cmd_info: T_CMD_INFO, unique_id: Optional[str] = None, test_mode: bool = False, ): # Only import htcondor.dags submit-side import htcondor.dags as dags inner_dag = dags.DAG( max_jobs_by_category={"TRANSFER_JOBS": 1} if test_mode else None) inner_dag.layer( name="xfer", submit_description=htcondor.Submit({ "output": "$(src_file_noslash).out", "error": "$(src_file_noslash).err", "log": "xfer_file.log", "arguments": classad.quote("exec '$(src_file)'"), "should_transfer_files": "yes", "transfer_output_files": "{}, metadata".format(SANDBOX_FILE_NAME), "transfer_output_remaps": classad.quote( "{} = $(dest); metadata = $(src_file_noslash).metadata".format( SANDBOX_FILE_NAME)), **shared_submit_descriptors(unique_id, requirements), }), vars=xfer_cmd_info, post=dags.Script( executable=THIS_FILE, arguments=[ "verify", "--json=xfer_commands.json", "--fileid", "$JOB" ], ), ) inner_dag.layer( name="verify", submit_description=htcondor.Submit({ "output": "$(src_file_noslash).out", "error": "$(src_file_noslash).err", "log": "verify_file.log", "arguments": classad.quote("verify_remote '$(src_file)'"), "should_transfer_files": "yes", "transfer_output_files": "metadata", "transfer_output_remaps": classad.quote("metadata = $(src_file_noslash).metadata"), **shared_submit_descriptors(unique_id, requirements), }), vars=verify_cmd_info, post=dags.Script( executable=THIS_FILE, arguments=[ "verify", "--json=verify_commands.json", "--fileid", "$JOB" ], ), ) return inner_dag
def test_config_file_has_right_contents(dag_dir): dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10}) dags.write_dag(dag, dag_dir) assert ("DAGMAN_MAX_JOBS_IDLE = 10" in (dag_dir / dags.CONFIG_FILE_NAME).read_text().splitlines())
def test_dagman_job_attributes_with_one_attr(): dag = dags.DAG(dagman_job_attributes={"foo": "bar"}) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert "SET_JOB_ATTR foo = bar" in lines
def test_dot_config_default(): dag = dags.DAG(dot_config=dags.DotConfig(Path("dag.dot"))) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert "DOT dag.dot DONT-UPDATE OVERWRITE" in lines
def test_max_jobs_per_category_with_two_categories(): dag = dags.DAG(max_jobs_by_category={"foo": 5, "bar": 10}) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert all(("CATEGORY foo 5" in lines, "CATEGORY bar 10" in lines))
#!/usr/bin/env python from pathlib import Path import glob import htcondor from htcondor import dags analysis_dag = dags.DAG() # This is the "count words in chunk" step, which now lives in the sub-DAG. # The split will have run by the time this code executes. # Therefore, we can inspect the directory to find out how many chunks were created. # determine the number of files in this directory that match the pattern num_chunks = len(glob.glob("words_*.txt")) count_words = analysis_dag.layer( name="count_words", submit_description=htcondor.Submit({ "executable": "count_words.py", "arguments": "$(word_set)", "transfer_input_files": "words_$(word_set).txt", "output": "count_words_$(word_set).out", "error": "count_words_$(word_set).err", }), vars=[{ "word_set": str(n) } for n in range(num_chunks)], )
def make_inner_dag( direction: TransferDirection, requirements: Optional[str], transfer_cmd_info: T_CMD_INFO, verify_cmd_info: T_CMD_INFO, unique_id: Optional[str] = None, test_mode: bool = False, ): # Only import htcondor.dags submit-side import htcondor.dags as dags inner_dag = dags.DAG( max_jobs_by_category={"TRANSFER_JOBS": 1} if test_mode else None) tof = [METADATA_FILE_NAME] tor = {METADATA_FILE_NAME: "$(flattened_name).metadata"} pull_tof = [SANDBOX_FILE_NAME] pull_tor = {SANDBOX_FILE_NAME: "$(flattened_name)"} shared_descriptors = shared_submit_descriptors(unique_id=unique_id, requirements=requirements) inner_dag.layer( name=direction, submit_description=htcondor.Submit({ "output": "$(flattened_name).out", "error": "$(flattened_name).err", "log": "transfer_file.log", "arguments": classad.quote("{} '$(remote_file)'".format( DIRECTION_TO_COMMAND[direction])), "should_transfer_files": "yes", "transfer_input_files": "$(local_file)" if direction is TransferDirection.PUSH else "", "transfer_output_files": ", ".join(tof + ( pull_tof if direction is TransferDirection.PULL else [])), "transfer_output_remaps": classad.quote(" ; ".join( "{} = {}".format(k, v) for k, v in { **tor, **(pull_tor if TransferDirection.PULL else {}), }.items())), **shared_descriptors, }), vars=transfer_cmd_info, post=dags.Script( executable=THIS_FILE, arguments=[ Commands.POST_TRANSFER, "--cmd-info", TRANSFER_COMMANDS_FILE_NAME, "--key", "$JOB", ], ), ) inner_dag.layer( name="verify", submit_description=htcondor.Submit({ "output": "$(flattened_name).out", "error": "$(flattened_name).err", "log": "verify_file.log", "arguments": classad.quote("{} '$(remote_file)'".format( Commands.GET_REMOTE_METADATA)), "should_transfer_files": "yes", "transfer_output_files": ", ".join(tof), "transfer_output_remaps": classad.quote(" ; ".join("{} = {}".format(k, v) for k, v in tor.items())), **shared_descriptors, }), vars=verify_cmd_info, post=dags.Script( executable=THIS_FILE, arguments=[ Commands.POST_TRANSFER, "--cmd-info", VERIFY_COMMANDS_FILE_NAME, "--key", "$JOB", "--only-verify", ], ), ) logging.info("Inner DAG shape:\n{}".format(inner_dag.describe())) return inner_dag
#!/usr/bin/env python from pathlib import Path import htcondor from htcondor import dags top_layer_dag = dags.DAG() # This is the "split" step. It stays in the top-level DAG. # Note that split_words.py no longer takes arguments. It determines the number # of chunks itself. split_words = top_layer_dag.layer( name="split_words", submit_description=htcondor.Submit({ "executable": "split_words.py", "transfer_input_files": "words.txt", "output": "split_words.out", "error": "split_words.err", }), post=dags.Script(executable="make_analysis_dag.py"), ) analysis_subdag = split_words.child_subdag(name="analysis", dag_file="analysis.dag") # Write out the DAG. # Now that we're going to have two DAG input files in this directory, we need # to give them unique names. this_dir = Path(__file__).parent dags.write_dag(top_layer_dag, this_dir, dag_file_name="top_level.dag")
def test_max_jobs_per_category_with_one_category(): dag = dags.DAG(max_jobs_by_category={"foo": 5}) writer = DAGWriter(dag) lines = dagfile_lines(writer) assert "CATEGORY foo 5" in lines