def store_state(self): """Stores the state of tron""" log.debug("store_state called: %r, %r", self.write_pid, self.writing_enabled) # If tron is already storing data, don't start again till it's done if self.write_pid or not self.writing_enabled: # If a child is writing, we don't want to ignore this change, so lets try it later if not self.store_delayed: self.store_delayed = True reactor.callLater(STATE_SLEEP, self.delay_store) return tmp_path = os.path.join(self.working_dir, '.tmp.' + STATE_FILE) file_path = os.path.join(self.working_dir, STATE_FILE) log.info("Storing state in %s", file_path) self.write_start = timeutils.current_timestamp() pid = os.fork() if pid: self.write_pid = pid reactor.callLater(STATE_SLEEP, self.check_write_child) else: exit_status = os.EX_SOFTWARE try: with open(tmp_path, 'w') as data_file: yaml.dump(self.data, data_file, default_flow_style=False, indent=4) shutil.move(tmp_path, file_path) exit_status = os.EX_OK except: log.exception("Failure while writing state") finally: os._exit(exit_status)
def check_write_child(self): if self.write_pid: pid, status = os.waitpid(self.write_pid, os.WNOHANG) if pid != 0: log.info("State writing completed in in %d seconds", timeutils.current_timestamp() - self.write_start) if status != 0: log.warning("State writing process failed with status %d", status) self.write_pid = None self.write_start = None else: # Process hasn't exited write_duration = timeutils.current_timestamp() - self.write_start if write_duration > WRITE_DURATION_WARNING_SECS: log.warning("State writing hasn't completed in %d secs", write_duration) reactor.callLater(STATE_SLEEP, self.check_write_child)
def state_data(self): data = { 'version': tron.__version_info__, 'create_time': timeutils.current_timestamp(), 'jobs': {}, 'services': {}, } for name, job_sched in self.mcp.jobs.iteritems(): data['jobs'][name] = job_sched.job.state_data for s in self.mcp.services.itervalues(): data['services'][s.name] = s.state_data return data
def handle_action_run_state_change(self, action_run: ActionRun, event): """Handle events triggered by JobRuns.""" log.info(f"{self} got an event: {event}") metrics.meter(f'tron.actionrun.{event}') if event == ActionRun.NOTIFY_TRIGGER_READY: if timeutils.current_timestamp() < self.run_time.timestamp(): log.info(f"{self} triggers are satisfied but not run_time yet") return started = self._start_action_runs() if any(started): log.info( f"{self} action runs triggered: " f"{', '.join(str(s) for s in started)}" ) return # propagate all state changes (from action runs) up to state serializer self.notify(self.NOTIFY_STATE_CHANGED) if not action_run.is_done: return if action_run.is_skipped and self.action_runs.is_scheduled: return if not action_run.is_broken: started = self._start_action_runs() if any(started): log.info( f"{self} action runs started: " f"{', '.join(str(s) for s in started)}" ) return if self.action_runs.is_active or self.action_runs.is_scheduled: log.info(f"{self} still has running or scheduled actions") return # If we can't make any progress, we're done cleanup_run: ActionRun = self.action_runs.cleanup_action_run if not cleanup_run or cleanup_run.is_done: return self.finalize() cleanup_run.start()
def exited(self, exit_status): self.end_time = timeutils.current_timestamp() self.exit_status = exit_status self.machine.transition("exit")
def started(self): self.start_time = timeutils.current_timestamp() self.machine.transition("start")
def exited(self, exit_status): if not self.machine.check('exit'): return False self.end_time = timeutils.current_timestamp() self.exit_status = exit_status return self.machine.transition('exit')
def started(self): if not self.machine.check('start'): return False self.start_time = timeutils.current_timestamp() return self.machine.transition('start')
def exited(self, exit_status): if self.machine.check('exit'): self.end_time = timeutils.current_timestamp() self.exit_status = exit_status return self.transition_and_notify('exit')
def started(self): if self.machine.check('start'): self.start_time = timeutils.current_timestamp() return self.transition_and_notify('start')