def _run_job(self, job: Job, params_it: Iterable[ParamSet]) -> Iterator[Tuple[ParamSet, Result]]: # TODO: introduce public API of Process to get working_directory if job.process._working_directory is not None: raise Exception('HTCondor does not support setting the working_directory on Process') with ExitStack() as stack: cluster_generator = _JobClusterGenerator(self, job, params_it) stack.enter_context(cluster_generator) submit = Submit() with self._schedd.transaction() as txn: submit_result = submit.queue_with_itemdata(txn, itemdata=iter(cluster_generator)) stack.callback( self._schedd.act, JobAction.Remove, f'ClusterId == {submit_result.cluster()}', ) job_states: Dict[int, _JobState] = {} for sleep_time in _get_poll_sleep_times(): sleep(sleep_time) query_result = self._schedd.xquery( requirements = f'ClusterId == {submit_result.cluster()}', projection = _JobState.projection(), ) job_states.clear() for job_state_ad in query_result: job_state = _JobState.from_class_ad(job_state_ad) job_states[job_state.proc_id] = job_state counts = _StatusCounts() counts.add_jobs(job_states.values()) print(counts) if counts.completed == counts.total: break results: List[Tuple[ParamSet, Result]] = [] for proc_id, process in enumerate(cluster_generator.processes): job_state = job_states[proc_id] if job_state.exit_by_signal: raise _ProcessFailedError( f'Process exited due to receiving signal {job_state.exit_signal}', ) if job_state.exit_code is None: raise Exception('Exit code received from HTCondor is None') result = process.result(job_state.exit_code) self._check_for_failure(job, result, process.params) results.append((process.params, result)) self._cleanup_handlers += cluster_generator.cleanup_handlers return iter(results)
def submit_dag(dag_file): """ Function to submit a HTCondor DAG (see https://htcondor.readthedocs.io/en/latest/apis/python-bindings/tutorials/DAG-Creation-And-Submission.html#Submit-the-DAG-via-the-Python-bindings) Parameters ---------- dag_file: str, Path Path to the DAG file. """ # create submit file for DAG dag_submit = Submit.from_dag(str(dag_file), {"force": 1}) dagdir = os.path.split(str(dag_file))[0] # get current directory cwd = os.getcwd() # move into DAG directory os.chdir(dagdir) # start scheddular schedd = Schedd() with schedd.transaction() as txn: _ = dag_submit.queue(txn) # switch back to current directory os.chdir(cwd)
def generate_submit_job(self, submitoptions={}): """ Generate a submit object. Parameters ---------- submitoptions: dict A dictionary containing any additional options for the submit file. """ # dictionary to contain specific submit options submit = {} submit.update(copy.deepcopy(self.submit_options)) submit.update(copy.deepcopy(submitoptions)) # add arguments submit["arguments"] = "$(ARGS)" # add requirements if isinstance(self.requirements, list): if len(self.requirements) > 0: submit["requirements"] = " && ".join(self.requirements) else: submit["requirements"] = self.requirements return Submit(submit)
def execute_submit(submit_object: htcondor.Submit, itemdata: List[Dict[str, str]]) -> int: """ Execute a map via the scheduler defined by the settings. Return the HTCondor cluster ID of the map's jobs. """ schedd = get_schedd() with schedd.transaction() as txn: submit_result = submit_object.queue_with_itemdata( txn, 1, iter(itemdata), ) return submit_result.cluster()
def ppplots(self): """ Set up job to create PP plots. """ from htcondor import Submit # get executable jobexec = shutil.which("cwinpy_pe_generate_pp_plots") # set log directory logdir = os.path.join(os.path.abspath(self.basedir), "log") self.makedirs(logdir) subdict = { "universe": "local", "executable": jobexec, "getenv": self.getenv, "arguments": "$(ARGS)", "log": os.path.join(logdir, "cwinpy_pe_pp_plots.log"), "error": os.path.join(logdir, "cwinpy_pe_pp_plots.err"), "output": os.path.join(logdir, "cwinpy_pe_pp_plots.out"), } if self.accountgroup is not None: subdict["accounting_group"] = self.accountgroup if self.accountuser is not None: subdict["accounting_group_user"] = self.accountuser submit = Submit(subdict) jobargs = "--path '{}' ".format(os.path.join(self.basedir, "results", "*", "*")) jobargs += "--output {} ".format(os.path.join(self.basedir, "ppplot.png")) if self.outputsnr: jobargs += "--snrs " vars = [{"ARGS": jobargs}] # add child layer to dag nodes = self.runner.dag.select(lambda x: x.name.startswith("cwinpy_pe")) nodes.child_layer( name="cwinpy_pe_pp_plots", submit_description=submit, vars=vars, )
def condor_submit(txn, priority: int) -> int: directory = os.getcwd() input_files = get_all_files(directory) if 'run_respdiff.sh' in input_files: executable = 'run_respdiff.sh' output_files = [ 'j$(Cluster).$(Process)_docker.txt', 'j$(Cluster).$(Process)_report.json', 'j$(Cluster).$(Process)_report.diffrepro.json', 'j$(Cluster).$(Process)_report.txt', 'j$(Cluster).$(Process)_report.diffrepro.txt', 'j$(Cluster).$(Process)_histogram.tar.gz', 'j$(Cluster).$(Process)_logs.tar.gz' ] if 'stats.json' in input_files: output_files.extend([ 'j$(Cluster).$(Process)_report.noref.json', 'j$(Cluster).$(Process)_report.noref.txt', 'j$(Cluster).$(Process)_report.diffrepro.noref.json', 'j$(Cluster).$(Process)_report.diffrepro.noref.txt', # 'j$(Cluster).$(Process)_dnsviz.json.gz', # 'j$(Cluster).$(Process)_report.noref.dnsviz.json', # 'j$(Cluster).$(Process)_report.noref.dnsviz.txt', ]) elif 'run_resperf.sh' in input_files: executable = 'run_resperf.sh' output_files = [ 'j$(Cluster).$(Process)_exitcode', 'j$(Cluster).$(Process)_docker.txt', 'j$(Cluster).$(Process)_resperf.txt', 'j$(Cluster).$(Process)_logs.tar.gz' ] elif 'run_distrotest.sh' in input_files: executable = 'run_distrotest.sh' output_files = [ 'j$(Cluster).$(Process)_exitcode', 'j$(Cluster).$(Process)_vagrant.log.txt' ] else: raise RuntimeError( "The provided directory doesn't look like a respdiff/resperf job. " "{}/run_*.sh is missing!".format(directory)) # create batch name from dir structure commit_dir_path, test_case = os.path.split(directory) _, commit_dir = os.path.split(commit_dir_path) batch_name = commit_dir + '_' + test_case submit = Submit({ 'priority': str(priority), 'executable': executable, 'arguments': '$(Cluster) $(Process)', 'error': 'j$(Cluster).$(Process)_stderr.txt', 'output': 'j$(Cluster).$(Process)_stdout.txt', 'log': 'j$(Cluster).$(Process)_log.txt', 'jobbatchname': batch_name, 'should_transfer_files': 'YES', 'when_to_transfer_output': 'ON_EXIT', 'transfer_input_files': ', '.join(input_files), 'transfer_output_files': ', '.join(output_files), }) return submit.queue(txn)