def _run_job(self, job: Job, params_it: Iterable[ParamSet]) -> Iterator[Tuple[ParamSet, Result]]: # TODO: introduce public API of Process to get working_directory if job.process._working_directory is not None: raise Exception('HTCondor does not support setting the working_directory on Process') with ExitStack() as stack: cluster_generator = _JobClusterGenerator(self, job, params_it) stack.enter_context(cluster_generator) submit = Submit() with self._schedd.transaction() as txn: submit_result = submit.queue_with_itemdata(txn, itemdata=iter(cluster_generator)) stack.callback( self._schedd.act, JobAction.Remove, f'ClusterId == {submit_result.cluster()}', ) job_states: Dict[int, _JobState] = {} for sleep_time in _get_poll_sleep_times(): sleep(sleep_time) query_result = self._schedd.xquery( requirements = f'ClusterId == {submit_result.cluster()}', projection = _JobState.projection(), ) job_states.clear() for job_state_ad in query_result: job_state = _JobState.from_class_ad(job_state_ad) job_states[job_state.proc_id] = job_state counts = _StatusCounts() counts.add_jobs(job_states.values()) print(counts) if counts.completed == counts.total: break results: List[Tuple[ParamSet, Result]] = [] for proc_id, process in enumerate(cluster_generator.processes): job_state = job_states[proc_id] if job_state.exit_by_signal: raise _ProcessFailedError( f'Process exited due to receiving signal {job_state.exit_signal}', ) if job_state.exit_code is None: raise Exception('Exit code received from HTCondor is None') result = process.result(job_state.exit_code) self._check_for_failure(job, result, process.params) results.append((process.params, result)) self._cleanup_handlers += cluster_generator.cleanup_handlers return iter(results)
def execute_submit(submit_object: htcondor.Submit, itemdata: List[Dict[str, str]]) -> int: """ Execute a map via the scheduler defined by the settings. Return the HTCondor cluster ID of the map's jobs. """ schedd = get_schedd() with schedd.transaction() as txn: submit_result = submit_object.queue_with_itemdata( txn, 1, iter(itemdata), ) return submit_result.cluster()