def submit_outer_dag( working_dir: Path, source_dir: Path, dest_dir: Path, requirements: Optional[str] = None, unique_id: Optional[str] = None, test_mode: bool = False, ): # Only import htcondor.dags submit-side import htcondor.dags as dags working_dir = working_dir.resolve() dest_dir = dest_dir.resolve() working_dir.mkdir(parents=True, exist_ok=True) dest_dir.mkdir(parents=True, exist_ok=True) transfer_manifest_path = dest_dir / "transfer_manifest.txt" outer_dag = make_outer_dag( dest_dir, requirements, source_dir, test_mode, transfer_manifest_path, unique_id, working_dir, ) if requirements: (working_dir / "requirements.txt").write_text(requirements) outer_dag_file = dags.write_dag(outer_dag, dag_dir=working_dir, dag_file_name="outer.dag") dag_args = {'force': 1} sub = htcondor.Submit.from_dag(str(outer_dag_file), dag_args) with change_dir(working_dir): schedd = htcondor.Schedd() with schedd.transaction() as txn: return sub.queue(txn)
def submit_outer_dag( direction: TransferDirection, working_dir: Path, local_dir: Path, remote_dir: Path, requirements: Optional[str] = None, unique_id: Optional[str] = None, test_mode: bool = False, ) -> int: # Only import htcondor.dags submit-side import htcondor.dags as dags working_dir = working_dir.resolve() local_dir = local_dir.resolve() working_dir.mkdir(parents=True, exist_ok=True) local_dir.mkdir(parents=True, exist_ok=True) outer_dag = make_outer_dag( direction=direction, local_dir=local_dir, remote_dir=remote_dir, working_dir=working_dir, requirements=requirements, unique_id=unique_id, test_mode=test_mode, ) outer_dag_file = dags.write_dag(outer_dag, dag_dir=working_dir, dag_file_name=OUTER_DAG_NAME) sub = htcondor.Submit.from_dag(str(outer_dag_file), DAG_ARGS) with change_dir(working_dir): schedd = htcondor.Schedd() with schedd.transaction() as txn: return sub.queue(txn)
def create_dag_file(dag_graph, dag_dir_name, information_dict): """ :param dag_graph: a DAG networkx graph representing the dependencies between the different jobs, where a job is specified by 'job_name' :param dag_dir_name: Directory for the dag. Will be overwritten. :param information_dict: a dictionary of dictionaries: has a key for each 'job_name'. in information_dict['job_name'] there are keys for the python script path (py_script_path) the batch parameters (batch_parameters). Will be set to [] by default 'kargs_dict' is a dictionary holding all parameters for running a job as specified in send_job scripts. :return: """ nodes = list(nx.topological_sort(dag_graph)) # layers = [] dag = dags.DAG() for job_name in nodes: job_submit = create_job_submit_format_from_python_script( information_dict[job_name]['py_script_path'], job_name, **information_dict[job_name]['kargs_dict']) if 'batch_parameters' not in information_dict[job_name].keys(): information_dict[job_name]['batch_parameters'] = [{}] layer = dag.layer(name=job_name, submit_description=job_submit, vars=information_dict[job_name]['batch_parameters']) # layers.append(layer) parents = list(dag_graph.predecessors(job_name)) if parents: for parent in parents: layer.add_parents(dag.glob(parent)) print(dag.describe()) if not os.path.exists(exec_dir + dag_dir_name): os.mkdir(exec_dir + dag_dir_name) shutil.rmtree(exec_dir + dag_dir_name, ignore_errors=True) dag_file = dags.write_dag(dag, exec_dir + dag_dir_name) return dag_file
def __init__( self, prior, ninj=100, maxamp=None, basedir=None, detector="AH1", submit=False, accountuser=None, accountgroup=None, getenv=False, sampler="dynesty", sampler_kwargs=None, freqrange=(10.0, 750.0), outputsnr=True, numba=False, ): if isinstance(prior, dict): self.prior = bilby.core.prior.PriorDict(dictionary=prior) else: raise TypeError("Prior must be a dictionary-type object") if ninj < 1: raise ValueError("A positive number of injection must be given") self.ninj = int(ninj) # set maximum amplitude if given self.maxamp = None if isinstance(maxamp, float): if maxamp > 0.0: self.maxamp = maxamp else: raise ValueError("Maximum amplitude must be positive") if basedir is not None: self.basedir = basedir self.makedirs(basedir) else: self.basedir = os.getcwd() # build output directory structure self.detector = detector if isinstance(self.detector, str): self.detector = [self.detector] if not isinstance(self.detector, list): raise TypeError("Detector must be a string or list of strings") # posterior sample results directory self.resultsdir = os.path.join(self.basedir, "results") self.makedirs(self.resultsdir) # create pulsar parameter files self.create_pulsars(freqrange=freqrange) # create dag configuration file self.accountuser = accountuser self.accountgroup = accountgroup self.getenv = getenv self.sampler = sampler self.sampler_kwargs = sampler_kwargs self.outputsnr = outputsnr self.numba = numba self.create_config() # create the DAG for cwinpy_pe jobs self.runner = pe_pipeline(config=self.config, build=False) # add PP plot creation DAG self.ppplots() # build and submit the DAG # write out the DAG and submit files submitdir = os.path.join(self.basedir, "submit") if not os.path.exists(submitdir): os.makedirs(submitdir) dagname = "cwinpy_pe_pp_plot" dag_file = write_dag(self.runner.dag, submitdir, dag_file_name=f"{dagname}.dag") # submit the DAG if requested if submit: submit_dag(dag_file)
from pathlib import Path import htcondor from htcondor import dags top_layer_dag = dags.DAG() # This is the "split" step. It stays in the top-level DAG. # Note that split_words.py no longer takes arguments. It determines the number # of chunks itself. split_words = top_layer_dag.layer( name="split_words", submit_description=htcondor.Submit({ "executable": "split_words.py", "transfer_input_files": "words.txt", "output": "split_words.out", "error": "split_words.err", }), post=dags.Script(executable="make_analysis_dag.py"), ) analysis_subdag = split_words.child_subdag(name="analysis", dag_file="analysis.dag") # Write out the DAG. # Now that we're going to have two DAG input files in this directory, we need # to give them unique names. this_dir = Path(__file__).parent dags.write_dag(top_layer_dag, this_dir, dag_file_name="top_level.dag") print(f"Wrote DAG files to {this_dir}")
def write_inner_dag( direction: TransferDirection, remote_prefix: Path, remote_manifest: Path, local_prefix: Path, requirements=None, test_mode: bool = False, unique_id=None, ): # Only import htcondor.dags submit-side import htcondor.dags as dags logging.info( "Generating SUBGDAG for transfer of %s->%s", remote_prefix, local_prefix, ) logging.info("Parsing remote file manifest...") remote_files = parse_file_manifest(remote_prefix, remote_manifest) logging.info("Generating local file manifest...") local_manifest_path = Path(LOCAL_MANIFEST_FILE_NAME) create_file_manifest(local_prefix, local_manifest_path) local_files = parse_file_manifest(local_prefix, local_manifest_path) transfer_manifest_path = local_prefix / TRANSFER_MANIFEST_FILE_NAME transfer_manifest_path.parent.mkdir(parents=True, exist_ok=True) transfer_manifest_path.touch(exist_ok=True) # Never transfer the transfer manifest transfer_manifest_file = transfer_manifest_path.relative_to(local_prefix) local_files.pop(transfer_manifest_file, None) remote_files.pop(transfer_manifest_file, None) if direction is TransferDirection.PULL: src_files, dest_files = remote_files, local_files else: # This is a PUSH src_files, dest_files = local_files, remote_files files_to_transfer = { fname for fname, size in src_files.items() if size != dest_files.get(fname, -1) } # TODO: rethink this logic for push vs. pull # Check for files that we have already verified, and do not verify them again. files_verified = set() for entry, _ in read_manifest(transfer_manifest_path): if not isinstance(entry, TransferComplete): continue files_verified.add(entry.name) files_to_verify = set() for fname in remote_files: if fname in files_to_transfer: continue if fname not in files_verified: files_to_verify.add(fname) files_to_transfer = sorted(files_to_transfer) files_to_verify = sorted(files_to_verify) if direction is TransferDirection.PULL: ensure_local_dirs_exist(local_prefix, files_to_transfer) transfer_cmd_info = make_cmd_info(direction, files_to_transfer, remote_prefix, local_prefix, transfer_manifest_path) verify_cmd_info = make_cmd_info(direction, files_to_verify, remote_prefix, local_prefix, transfer_manifest_path) write_cmd_info(transfer_cmd_info, Path(TRANSFER_COMMANDS_FILE_NAME)) write_cmd_info(verify_cmd_info, Path(VERIFY_COMMANDS_FILE_NAME)) dags.write_dag( make_inner_dag( direction=direction, requirements=requirements, transfer_cmd_info=transfer_cmd_info, verify_cmd_info=verify_cmd_info, unique_id=unique_id, test_mode=test_mode, ), dag_dir=Path.cwd(), # this will be the working dir of the outer DAG dag_file_name=INNER_DAG_NAME, ) bytes_to_transfer = sum(src_files[fname] for fname in files_to_transfer) bytes_to_verify = sum(src_files[fname] for fname in files_to_verify) with transfer_manifest_path.open(mode="a") as f: SyncRequest( direction=direction, remote_prefix=remote_prefix, files_at_source=len(src_files), files_to_transfer=len(files_to_transfer), bytes_to_transfer=bytes_to_transfer, files_to_verify=len(files_to_verify), bytes_to_verify=bytes_to_verify, timestamp=timestamp(), ).write_entry_to(f) for fname in files_to_transfer: TransferRequest(name=fname, size=src_files[fname]).write_entry_to(f) for fname in files_to_verify: VerifyRequest(name=fname, size=src_files[fname]).write_entry_to(f)
def write_inner_dag( source_prefix: Path, source_manifest: Path, dest_prefix: Path, requirements=None, test_mode: bool = False, unique_id=None, ): # Only import htcondor.dags submit-side import htcondor.dags as dags src_files = parse_manifest(source_prefix, source_manifest, "Source") generate_file_listing(dest_prefix, Path("destination_manifest.txt")) dest_files = parse_manifest(dest_prefix, Path("destination_manifest.txt"), "Destination") files_to_xfer = set() for fname in src_files: if src_files[fname] != dest_files.get(fname, -1): files_to_xfer.add(fname) transfer_manifest_path = Path( os.path.join(dest_prefix, "transfer_manifest.txt")) transfer_manifest_path.parent.mkdir(parents=True, exist_ok=True) transfer_manifest_path.touch(exist_ok=True) # Check for files that we have already verified, and do not verify them again. files_verified = set() with transfer_manifest_path.open(mode="r") as f: for line in f: line = line.strip() if not line or line.startswith("#"): continue info = line.split() if info[0] != "TRANSFER_VERIFIED": continue info = json.loads(" ".join(info[1:])) if not valid_metadata(info): continue files_verified.add(info["name"]) files_to_verify = set() for fname in src_files: if fname in files_to_xfer: continue if fname not in files_verified: files_to_verify.add(fname) ensure_destination_dirs_exist(dest_prefix, files_to_xfer) xfer_cmd_info = make_cmd_info(files_to_xfer, source_prefix, dest_prefix, transfer_manifest_path) verify_cmd_info = make_cmd_info(files_to_verify, source_prefix, dest_prefix, transfer_manifest_path) write_cmd_info(xfer_cmd_info, Path("xfer_commands.json")) write_cmd_info(verify_cmd_info, Path("verify_commands.json")) inner_dag = make_inner_dag(requirements, xfer_cmd_info, verify_cmd_info, unique_id, test_mode) print(inner_dag.describe()) dags.write_dag(inner_dag, dag_dir=Path.cwd(), dag_file_name="inner.dag") bytes_to_transfer = sum(src_files[fname] for fname in files_to_xfer) bytes_to_verify = sum(src_files[fname] for fname in files_to_verify) with transfer_manifest_path.open(mode="a") as f: f.write( "SYNC_REQUEST {} files_at_source={} files_to_transfer={} bytes_to_transfer={} files_to_verify={} bytes_to_verify={} timestamp={}\n" .format(source_prefix, len(src_files), len(files_to_xfer), bytes_to_transfer, len(files_to_verify), bytes_to_verify, time.time())) for fname in files_to_xfer: info = {"name": fname, "size": src_files[fname]} f.write("TRANSFER_REQUEST {}\n".format(json.dumps(info))) for fname in files_to_verify: info = {"name": fname, "size": src_files[fname]} f.write("VERIFY_REQUEST {}\n".format(json.dumps(info)))
def test_config_file_has_right_contents(dag_dir): dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10}) dags.write_dag(dag, dag_dir) assert ("DAGMAN_MAX_JOBS_IDLE = 10" in (dag_dir / dags.CONFIG_FILE_NAME).read_text().splitlines())
def test_config_file_gets_written_if_config_given(dag_dir): dag = dags.DAG(dagman_config={"DAGMAN_MAX_JOBS_IDLE": 10}) dags.write_dag(dag, dag_dir) assert (dag_dir / dags.CONFIG_FILE_NAME).exists()
) # This is the "combine the counts from each chunk" step. # Since it can't run until all the chunks are done, we create it as a child # of the previous step. # It's input files are all of the output files from the previous step, which # is easy in this case because we know the naming scheme. combine_counts = count_words.child_layer( name="combine_counts", submit_description=htcondor.Submit({ "executable": "combine_counts.py", "transfer_input_files": ", ".join(f"counts_{n}.txt" for n in range(NUM_CHUNKS)), "output": "combine_counts.out", "error": "combine_counts.err", }), ) # We're done setting up the DAG, so we can write it out. # The DAG input file itself as well as all of the submit descriptions will # be written out to the specified directory. # Here, we just write it out to the same directory that this file is in. # If you write it out to a different directory, you may need to be careful # about filepaths in your submit descriptions! this_dir = Path(__file__).parent dags.write_dag(diamond, this_dir) print(f"Wrote DAG files to {this_dir}")
}), vars=[{ "word_set": str(n) } for n in range(num_chunks)], ) # This is the "combine the counts from each chunk" step. combine_counts = count_words.child_layer( name="combine_counts", submit_description=htcondor.Submit({ "executable": "combine_counts.py", "transfer_input_files": ", ".join(f"counts_{n}.txt" for n in range(num_chunks)), "output": "combine_counts.out", "error": "combine_counts.err", }), ) # We're done setting up the DAG, so we can write it out. # The DAG input file itself as well as all of the submit descriptions will # be written out to the specified directory. # Here, we just write it out to the same directory that this file is in. # If you write it out to a different directory, you may need to be careful # about filepaths in your submit descriptions! this_dir = Path(__file__).parent dags.write_dag(analysis_dag, this_dir, dag_file_name="analysis.dag") print(f"Wrote DAG files to {this_dir}")