def update(self): # What's done is done if self.status == 'JOB_TERMINATED': return # Updating works by opening the log file, # Looping through and only keeping the last event, # which really tells us what's going on. # This is not very efficient, so an alternative # implementation would be welcome jel = htcondor.JobEventLog(self._log) first = None try: for event in jel.events(stop_after=0): if not first: first = event latest = event try: self._code = latest["ReturnValue"] except KeyError: self._code = "-" self.status = str(htcondor.JobEventType.values[latest.type]) self.cluster = latest.cluster self.runtime = latest.timestamp - first.timestamp except OSError: self.code = "-" self.status = "NOPARSE" self.cluster = "-" self.runtime = -1 finally: jel.close()
def __init__( self, function, input_file, working_dir = None ): self.function = function self.input_file = Path(input_file) if working_dir is None: working_dir = Path.cwd() self.uid = uuid.uuid4() self.working_dir = working_dir self.working_dir.mkdir(parents = True, exist_ok = True) self._event_log_path = self.working_dir / f'{self.uid}.log' self._event_log_path.touch(exist_ok = True) self._events = htcondor.JobEventLog(self._event_log_path.as_posix()).events(0) self._output_file_path = self.working_dir / f'{self.uid}.output' self._state = TaskState.Unsubmitted self._jobid = None TASKS.add(self)
def update(self): if self.status == 'JOB_TERMINATED': return # For updaing, open the log file # and look at the status of the # last event. jel = htcondor.JobEventLog(self._log) first = None try: for event in jel.events(stop_after=0): if not first: first = event latest = event try: self.code = latest['ReturnValue'] except KeyError: self.code = '-' self.status = str(htcondor.JobEventType.values[latest.type]) self.cluster = latest.cluster self.runtime = latest.timestamp - first.timestamp except OSError: self.code = '-' self.status = 'NOPARSE' self.cluster = '-' self.runtime = -1 finally: jel.close()
def _update(self): logger.debug(f"triggered status update for handle {self._handle}") if self._events is None: logger.debug( f"looking for event log for handle {self._handle} at {self._event_log_path}" ) self._events = htcondor.JobEventLog( self._event_log_path.as_posix()).events(0) logger.debug( f"initialized event log reader for handle {self._handle}, targeting {self._event_log_path}" ) for event in self._events: if event.cluster != self._clusterid: continue new_status = JOB_EVENT_STATUS_TRANSITIONS.get(event.type, None) if new_status is not None: key = event.proc - self._offset # update counts old_status = self._data[key] self._counts[old_status] -= 1 self._counts[new_status] += 1 # set new status on individual job self._data[key] = new_status logger.debug(f"new status counts for {self._handle}: {self._counts}")
def wait_log(self, ulog): """ Wait for a job to finish """ data = {} if StrictVersion(VERSION) < StrictVersion('8.7.10'): fp = open(ulog) events = htcondor.read_events(fp) while True: try: r = events.next() except StopIteration: log.debug("No Event but stopiter") time.sleep(2.2) else: self.process_event(r, data) log.debug(data) if self._is_terminal(data): break else: for r in htcondor.JobEventLog(ulog).events(None): self.process_event(r, data) log.debug(data) if self._is_terminal(data): break log.debug("all jobs terminal")
def test_correct_events_read(self, logfile): count = 0 jel = htcondor.JobEventLog(logfile) for event in jel.events(stop_after=0): assert (compareEvent(event, count)) count += 1 assert (count == 39)
def test_submit_success(self, test_dir, submit_success): assert submit_success.stderr == "Job 1 was submitted." jel = htcondor.JobEventLog((test_dir / "helloworld.log").as_posix()) # Wait for the job to finish by watching its event log for event in jel.events(stop_after=None): if event.type == htcondor.JobEventType.JOB_TERMINATED: break assert Path(test_dir / "helloworld.out").read_text() == "Hello, World!\n"
def test_enter_and_exit(self, logfile): with htcondor.JobEventLog(logfile) as jel: for i in range(0, 30): event = next(jel) try: event = next(jel) assert (False) except StopIteration as si: pass
def test_close(self, logfile): with htcondor.JobEventLog(logfile) as jel: e = next(jel) jel.close() try: e = next(jel) assert (False) except StopIteration as si: pass
def write_and_read_back_event(event): with open('test_toe_exit_info.event', mode="w") as f: f.write(event) f.write('...\n') jel = htcondor.JobEventLog('test_toe_exit_info.event') os.unlink('test_toe_exit_info.event') for e in jel.events(stop_after=0): return (str(e))
def read_events(self): if self._event_reader is None: self._event_reader = htcondor.JobEventLog( self._event_log_path.as_posix()).events(0) for event in self._event_reader: if event.cluster != self._clusterid: continue self.events.append(event) yield event
def watch_events(self) -> None: if self.events is None: self.events = htcondor.JobEventLog(self.event_log.as_posix()) for event in self.events: text = str(event).rstrip() click.secho(text, err=True, fg=JOB_EVENT_TO_COLOR.get(event.type, "white")) if event.type in BREAK_ON_JOB_EVENTS: break
def equal_priority_execute_events(submit_equal_priority_jobs): """ Simple approach to retrieving execute events. Open the job event log, iterate over the events in order and add all execute events to a list. """ jel = htcondor.JobEventLog("scheduler_priority-equal.log") execute_events = [] for event in jel.events(0): if event.type == htcondor.JobEventType.EXECUTE: execute_events.append(event) return execute_events
def read_events(self) -> Iterator[htcondor.JobEvent]: """Yield all un-read events in the event log.""" if self._event_reader is None: self._event_reader = htcondor.JobEventLog( self._event_log_path.as_posix()).events(0) for event in self._event_reader: if event.cluster != self._clusterid: continue self.events.append(event) yield event
def watch_events(self) -> None: if self.events is None: self.events = htcondor.JobEventLog(self.event_log.as_posix()) for event in self.events: text = str(event).rstrip() if event.type in (htcondor.JobEventType.JOB_HELD, htcondor.JobEventType.JOB_TERMINATED): click.secho(text, err=True, fg="red") elif event.type is htcondor.JobEventType.JOB_ABORTED: click.secho(text, err=True, fg="white") break else: click.secho(text, err=True, fg="white")
def parseCondorLog(cacheDoc): """ do all real work and update checkpoints, nodes and nodemap dictionaries takes as input a cacheDoc dictionary with keys jobLogCheckpoint, fjrParseResCheckpoint, nodes, nodeMap and returns the same dictionary with updated information """ jobLogCheckpoint = cacheDoc['jobLogCheckpoint'] fjrParseResCheckpoint = cacheDoc['fjrParseResCheckpoint'] nodes = cacheDoc['nodes'] nodeMap = cacheDoc['nodeMap'] if jobLogCheckpoint: # resume log parsing where we left with open((LOG_PARSING_POINTERS_DIR + jobLogCheckpoint), 'r') as f: jel = pickle.load(f) else: # parse log from beginning jel = htcondor.JobEventLog('job_log') parseJobLog(jel, nodes, nodeMap) # save jel object in a pickle file made unique by a timestamp newJelPickleName = 'jel-%d.pkl' % int(time.time()) if not os.path.exists(LOG_PARSING_POINTERS_DIR): os.mkdir(LOG_PARSING_POINTERS_DIR) with open((LOG_PARSING_POINTERS_DIR + newJelPickleName), 'w') as f: pickle.dump(jel, f) newJobLogCheckpoint = newJelPickleName for fn in glob.glob("node_state*"): level = re.match(r'(\w+)(?:.(\w+))?', fn).group(2) with open(fn, 'r') as nodeState: parseNodeStateV2(nodeState, nodes, level) try: errorSummary, newFjrParseResCheckpoint = summarizeFjrParseResults( fjrParseResCheckpoint) if errorSummary and newFjrParseResCheckpoint: parseErrorReport(errorSummary, nodes) except IOError: logging.exception("error during error_summary file handling") # collect all cache info in a single dictionary and return it to called newCacheDoc = {} newCacheDoc['jobLogCheckpoint'] = newJobLogCheckpoint newCacheDoc['fjrParseResCheckpoint'] = newFjrParseResCheckpoint newCacheDoc['nodes'] = nodes newCacheDoc['nodeMap'] = nodeMap return newCacheDoc
def successful_job_log(successful_condor, successful_beneficiary_job, successful_victim_jobs, test_dir): bID = str(successful_beneficiary_job.job_ids[0]) vIDs = [str(vID) for vID in successful_victim_jobs.job_ids] rv = successful_condor.run_command(["condor_now", bID] + vIDs) assert rv.returncode == 0 assert successful_beneficiary_job.wait( condition=ClusterState.all_running, timeout=60, fail_condition=ClusterState.any_held, ) # This seems like something I should be able to get from the cluster handle. return htcondor.JobEventLog((test_dir / "cmd_now-success.log").as_posix())
def _read_events(self, timeout=0): if self._event_reader is None: self._event_reader = htcondor.JobEventLog( self._event_log_path.as_posix()).events(timeout) for event in self._event_reader.events(timeout): # skip the late materialization submit event if event.proc == -1: continue job_id = JobID(event.cluster, event.proc) if event.type is htcondor.JobEventType.SUBMIT: self._jobid_to_taskid[job_id] = uuid.UUID( classad.unquote(event["LogNotes"])) # this lookup is safe because the SUBMIT event always comes first task_id = self._jobid_to_taskid[job_id] task = self.executor.tasks[task_id] if event.type is htcondor.JobEventType.JOB_HELD: # TODO: turn this into an appropriate exception on the future raise Exception("job held") new_status = JOB_EVENT_STATUS_TRANSITIONS.get(event.type, None) if new_status is not None: if new_status is self._task_statuses[task_id]: logger.warning( f"{task} of executor {self.executor} tried to transition into the state it is already in ({new_status})" ) else: self._task_statuses[task_id] = new_status if new_status is TaskStatus.COMPLETED: x = htio.load_objects(task.output_path) status = next(x) output = next(x) print(f"{task} finished with {status}, {output}") if status == "OK": task.future.set_result(output) elif status == "ERR": task.future.set_exception(output) else: raise Exception(f"bad task status {status}")
def __init__(self, event_log_paths, batch_names): event_readers = {} for event_log_path in event_log_paths: try: reader = htcondor.JobEventLog(event_log_path).events(0) event_readers[event_log_path] = reader except (OSError, IOError) as e: warning( "Could not open event log at {} for reading, so it will be ignored. Reason: {}" .format(event_log_path, e)) self.event_readers = event_readers self.state = collections.defaultdict( lambda: collections.defaultdict(dict)) self.batch_names = batch_names self.cluster_id_to_cluster = {}
def _read_events(self): with self._event_reader_lock: # no thread can be in here at the same time as another if self._event_reader is None: logger.debug( f"Created event log reader for map {self.map.tag}") self._event_reader = htcondor.JobEventLog( self._event_log_path.as_posix()).events(0) with utils.Timer() as timer: handled_events = self._handle_events() if handled_events > 0: logger.debug( f"Processed {handled_events} events for map {self.map.tag} (took {timer.elapsed:.6f} seconds)" ) self.map._local_data = None # invalidate cache if any events were received if utils.BINDINGS_VERSION_INFO >= (8, 9, 3): self.save()
def job_log(jobs, condor, test_dir): bID = str(jobs.state.by_name[JobStatus.IDLE][0]) vIDs = [str(vID) for vID in jobs.state.by_name[JobStatus.RUNNING]] rv = condor.run_command(["condor_now", "--flags", "1", bID, *vIDs]) assert rv.returncode == 0 # Consider converting this into a wait() for these jobs to go idle, and # then assert ordering about the eviction event in # jobs.event_log.events. jel = htcondor.JobEventLog( (test_dir / "condor_now_internals.log").as_posix()) num_evicted = 0 for e in jel.events(60): if e.type == htcondor.JobEventType.JOB_EVICTED and e.cluster == jobs.clusterid: num_evicted += 1 if num_evicted == len(vIDs): break assert num_evicted == len(vIDs) return jel
def main(argv): jel = htcondor.JobEventLog(argv[1]) if not jel.isInitialized(): print "Failed to find job event log {0}".format(argv[1]) exit(1) if (str(jel) != str(iter(jel))): print("jel != iter(jel)") exit(2) if (str(jel) != str(jel.follow())): print("jel != jel.follow()") exit(3) if (str(jel) != str(jel.follow(100))): print("jel != jel.follow( 100 )") exit(3) for event in jel.follow(1000): print "Found event of type {0}".format(event.type) if event.type != htcondor.JobEventType.NONE: print "... for job {0}".format(event.Cluster)
def test_status_no_error_no_parallel(self): sched = CondorScheduler() log_name = 'completed_no_error_no_parallel.submit.nodes.log' jel = htcondor.JobEventLog( os.path.join(os.path.dirname(__file__), 'data', log_name)) events = list(jel.events(stop_after=0)) with TemporaryDirectory() as td: submit_dir = os.path.join(td, 'job', 'submit') os.makedirs(submit_dir) fn = os.path.join(submit_dir, log_name) details = { 'working_directory': td, 'submit_directory': 'job/submit' } def write_next_event(): with open(fn, "a") as f: f.write(str(events.pop(0))) f.write('...\n') write_next_event() self.assertEqual(sched.status(None, details), (JobStatus.QUEUED, "Job is queued")) for _ in range(7): write_next_event() self.assertEqual(sched.status(None, details), (JobStatus.RUNNING, "Job is running")) write_next_event() self.assertEqual( sched.status(None, details), (JobStatus.COMPLETED, "All job stages finished successfully"))
def test_status_error_short(self): sched = CondorScheduler() log_name = 'error_short.submit.nodes.log' jel = htcondor.JobEventLog( os.path.join(os.path.dirname(__file__), 'data', log_name)) events = list(jel.events(stop_after=0)) with TemporaryDirectory() as td: submit_dir = os.path.join(td, 'job', 'submit') os.makedirs(submit_dir) fn = os.path.join(submit_dir, log_name) details = { 'working_directory': td, 'submit_directory': 'job/submit' } def write_next_event(): with open(fn, "a") as f: f.write(str(events.pop(0))) f.write('...\n') write_next_event() self.assertEqual(sched.status(None, details), (JobStatus.QUEUED, "Job is queued")) write_next_event() self.assertEqual(sched.status(None, details), (JobStatus.RUNNING, "Job is running")) write_next_event() self.assertEqual( sched.status(None, details), (JobStatus.ERROR, "Job terminated with return value 1"))
def _read_events(self): with self._event_reader_lock: # no thread can be in here at the same time as another handled_events = False if self._event_reader is None: logger.debug( f'Created event log reader for map {self.map.tag}') self._event_reader = htcondor.JobEventLog( self._event_log_path.as_posix()).events(0) for event in self._event_reader: handled_events = True # skip the late materialization submit event if event.proc == -1: continue if event.type is htcondor.JobEventType.SUBMIT: self._jobid_to_component[(event.cluster, event.proc)] = int( event['LogNotes']) # this lookup is safe because the SUBMIT event always comes first component = self._jobid_to_component[(event.cluster, event.proc)] if event.type is htcondor.JobEventType.IMAGE_SIZE: self._memory_usage[component] = max( self._memory_usage[component], int(event.get('MemoryUsage', 0)), ) elif event.type is htcondor.JobEventType.JOB_TERMINATED: self._runtime[component] = parse_runtime( event['RunRemoteUsage']) elif event.type is htcondor.JobEventType.JOB_RELEASED: self._holds.pop(component, None) elif event.type is htcondor.JobEventType.JOB_HELD: h = holds.ComponentHold( code=int(event['HoldReasonCode']), reason=event.get('HoldReason', 'UNKNOWN').strip(), ) self._holds[component] = h new_status = JOB_EVENT_STATUS_TRANSITIONS.get(event.type, None) # the component has *terminated*, but did it error? if new_status is ComponentStatus.COMPLETED: try: exec_status = self.map._peek_status(component) except exceptions.OutputNotFound: logger.warning( f'Output was not found for component {component} for map {self.map.tag}, marking as errored' ) exec_status = 'ERR' if exec_status == 'ERR': new_status = ComponentStatus.ERRORED if new_status is not None: if new_status is self._component_statuses[component]: logger.warning( f'Component {component} of map {self.map.tag} tried to transition into the state it is already in ({new_status})' ) else: # this log is commented-out because its very verbose # might be helpful when debugging # logger.debug(f'Component {component} of map {self.map.tag} changed state: {self._component_statuses[component]} -> {new_status}') self._component_statuses[component] = new_status if handled_events: self.map._local_data = None # invalidate cache if any events were received if utils.HTCONDOR_VERSION_INFO >= (8, 9, 3): self.save()
def storeNodesInfoInFile(): """ Open cache file and get the location until which the jobs_log was parsed last time :return: nothing """ jobLogCheckpoint = None try: if os.path.exists( STATUS_CACHE_FILE) and os.stat(STATUS_CACHE_FILE).st_size > 0: logging.debug("cache file found, opening and reading") nodesStorage = open(STATUS_CACHE_FILE, "r") jobLogCheckpoint = nodesStorage.readline().strip() fjrParseResCheckpoint = int(nodesStorage.readline()) nodes = ast.literal_eval(nodesStorage.readline()) nodeMap = ast.literal_eval(nodesStorage.readline()) nodesStorage.close() else: logging.debug("cache file not found, creating") jobLogCheckpoint = None fjrParseResCheckpoint = 0 nodes = {} nodeMap = {} except Exception: logging.exception("error during status_cache handling") if jobLogCheckpoint: with open((LOG_PARSING_POINTERS_DIR + jobLogCheckpoint), 'r') as f: jel = pickle.load(f) else: jel = htcondor.JobEventLog('job_log') #jobsLog = open("job_log", "r") #jobsLog.seek(jobLogCheckpoint) parseJobLog(jel, nodes, nodeMap) # save jel object in a pickle file made unique by a timestamp newJelPickleName = 'jel-%d.pkl' % int(time.time()) if not os.path.exists(LOG_PARSING_POINTERS_DIR): os.mkdir(LOG_PARSING_POINTERS_DIR) with open((LOG_PARSING_POINTERS_DIR + newJelPickleName), 'w') as f: pickle.dump(jel, f) newJobLogCheckpoint = newJelPickleName for fn in glob.glob("node_state*"): level = re.match(r'(\w+)(?:.(\w+))?', fn).group(2) with open(fn, 'r') as nodeState: parseNodeStateV2(nodeState, nodes, level) try: errorSummary, newFjrParseResCheckpoint = summarizeFjrParseResults( fjrParseResCheckpoint) if errorSummary and newFjrParseResCheckpoint: parseErrorReport(errorSummary, nodes) except IOError: logging.exception("error during error_summary file handling") # First write the new cache file under a temporary name, so that other processes # don't get an incomplete result. Then replace the old one with the new one. tempFilename = (STATUS_CACHE_FILE + ".%s") % os.getpid() nodesStorage = open(tempFilename, "w") nodesStorage.write(str(newJobLogCheckpoint) + "\n") nodesStorage.write(str(newFjrParseResCheckpoint) + "\n") nodesStorage.write(str(nodes) + "\n") nodesStorage.write(str(nodeMap) + "\n") nodesStorage.close() move(tempFilename, STATUS_CACHE_FILE)
def finished_inline_jobid(inline_dag_job, dag_dir): jel = htcondor.JobEventLog(str(dag_dir / "inline.dag.nodes.log")) for event in jel.events(0): if event.type == htcondor.JobEventType.SUBMIT: return JobID.from_job_event(event) assert False
def status(self, job_id, details): """ Get the status of a job by scheduler id :param job_id: The scheduler job id to check the status of :param details: The internal job details object :return: A tuple with JobStatus, additional info as a string. None if no job status could be obtained """ p = Path(details['working_directory']) / details['submit_directory'] print(f"Trying to get status of job with working directory {p}...") log_file = list(p.glob('*.submit.nodes.log')) if len(log_file) != 1: print( f"The number of .submit.nodes.log files was not 1 as expected, it was {len(log_file)}" ) return None, None log_file = log_file[0] # Parse the log event log with condor and get a reverse chronological list of events jel = htcondor.JobEventLog(str(log_file)) events = list(jel.events(stop_after=0)) events.reverse() # Find the most recent submit event and parse the log notes to find which job stage the submit # is for submit_event = list( filter(lambda x: x.type == htcondor.JobEventType.SUBMIT, events))[0] notes = submit_event['LogNotes'] stage = list( filter(lambda x: x.startswith("DAG Node:"), notes.splitlines())) # There should be exactly one stage found, which is the name of the job dag for the submitted job if len(stage) != 1: print( "No DAG Node could be found for the most recent job submission" ) return None, None stage = stage[0] # Get the most recent event and determine the job state event = events[0] if event.type == htcondor.JobEventType.SUBMIT: # The only time a job can be queued is when the most recent job that was submitted was the # generation stage, otherwise SUBMIT indicates the job is running if stage.endswith('_generation_arg_0'): return JobStatus.QUEUED, "Job is queued" else: return JobStatus.RUNNING, "Job is running" if event.type == htcondor.JobEventType.EXECUTE: # EXECUTE is self explanitory. return JobStatus.RUNNING, "Job is running" if event.type == htcondor.JobEventType.JOB_TERMINATED: # Jobs that terminate normally and have a return value of 0 completed successfully, otherwise # some error has occurred if (event["TerminatedNormally"]): if event['ReturnValue'] != 0: return JobStatus.ERROR, f"Job terminated with return value {event['ReturnValue']}" # Completion status can only be reported if the current job stage is plotting, otherwise # job should continue in running state if stage.endswith('_plot_arg_0'): return JobStatus.COMPLETED, "All job stages finished successfully" else: return JobStatus.RUNNING, "Job is running" else: # ??? return JobStatus.ERROR, "Job terminated abnormally" # Bilby jobs may be evicted, which is ok. Bilby jobs which are evicted will resubmit via signal # and continue. Held/released jobs are also part of the internal eviction/resubmit process if event.type in [ htcondor.JobEventType.JOB_EVICTED, htcondor.JobEventType.JOB_HELD, htcondor.JobEventType.JOB_RELEASED ]: return JobStatus.RUNNING, "Job is running" # If the job has been aborted, it's probably been cancelled - mark it as such if event.type == htcondor.JobEventType.JOB_ABORTED: return JobStatus.CANCELLED, "Job has been aborted" print( f"Unexpected job event {event.type}! for working directory {details['working_directory']}" ) return None, None
def storeNodesInfoInFile(): """ Open cache file and get the location until which the jobs_log was parsed last time returns: a dictionary with keys: jobLogCheckpoint, fjrParseResCheckpoint, nodes, nodeMap """ jobLogCheckpoint = None if os.path.exists( STATUS_CACHE_FILE) and os.stat(STATUS_CACHE_FILE).st_size > 0: logging.debug("cache file found, opening") try: nodesStorage = open(STATUS_CACHE_FILE, "r") jobLogCheckpoint = nodesStorage.readline().strip() if jobLogCheckpoint.startswith('#'): logging.debug("cache file contains initial comments, skipping") # comment line indicates a place-holder file created at DAG bootstrap time jobLogCheckpoint = None else: logging.debug("reading cache file") fjrParseResCheckpoint = int(nodesStorage.readline()) nodes = ast.literal_eval(nodesStorage.readline()) nodeMap = ast.literal_eval(nodesStorage.readline()) nodesStorage.close() except Exception: logging.exception("error during status_cache handling") jobLogCheckpoint = None if not jobLogCheckpoint: logging.debug("no usable cache file found, creating") fjrParseResCheckpoint = 0 nodes = {} nodeMap = {} if jobLogCheckpoint: # resume log parsing where we left with open((LOG_PARSING_POINTERS_DIR + jobLogCheckpoint), 'r') as f: jel = pickle.load(f) else: # parse log from beginning jel = htcondor.JobEventLog('job_log') #jobsLog = open("job_log", "r") #jobsLog.seek(jobLogCheckpoint) parseJobLog(jel, nodes, nodeMap) # save jel object in a pickle file made unique by a timestamp newJelPickleName = 'jel-%d.pkl' % int(time.time()) if not os.path.exists(LOG_PARSING_POINTERS_DIR): os.mkdir(LOG_PARSING_POINTERS_DIR) with open((LOG_PARSING_POINTERS_DIR + newJelPickleName), 'w') as f: pickle.dump(jel, f) newJobLogCheckpoint = newJelPickleName for fn in glob.glob("node_state*"): level = re.match(r'(\w+)(?:.(\w+))?', fn).group(2) with open(fn, 'r') as nodeState: parseNodeStateV2(nodeState, nodes, level) try: errorSummary, newFjrParseResCheckpoint = summarizeFjrParseResults( fjrParseResCheckpoint) if errorSummary and newFjrParseResCheckpoint: parseErrorReport(errorSummary, nodes) except IOError: logging.exception("error during error_summary file handling") # First write the new cache file under a temporary name, so that other processes # don't get an incomplete result. Then replace the old one with the new one. tempFilename = (STATUS_CACHE_FILE + ".%s") % os.getpid() nodesStorage = open(tempFilename, "w") nodesStorage.write(str(newJobLogCheckpoint) + "\n") nodesStorage.write(str(newFjrParseResCheckpoint) + "\n") nodesStorage.write(str(nodes) + "\n") nodesStorage.write(str(nodeMap) + "\n") nodesStorage.close() move(tempFilename, STATUS_CACHE_FILE) # collect all cache info in a single dictionary and return it to called cacheDoc = {} cacheDoc['jobLogCheckpoint'] = newJobLogCheckpoint cacheDoc['fjrParseResCheckpoint'] = newFjrParseResCheckpoint cacheDoc['nodes'] = nodes cacheDoc['nodeMap'] = nodeMap return cacheDoc
def synthetic(logfile): new = "" jel = htcondor.JobEventLog(logfile) for event in jel.events(stop_after=0): new = new + str(event) + "...\n" return new