def run(self): # os.close gets called in finally, # which sometimes is executed while the process is shutting down already. # It happens that the Python interpreter has already cleaned up at this point # and "os" resolves to None, leading to an AttributeError. # Thus we keep our own reference to this function. # (Should not happen anymore since this is no longer a daemon thread, # but should not hurt anyway.) close = os.close try: # In an eventfd, there are always 8 bytes for the event number. # We just do a blocking read to wait for the event. _ = os.read(self._efd, 8) # If read returned, this means the kernel sent us an event. # It does so either on OOM or if the cgroup is removed. if not self._finished.is_set(): self._callback("memory") logging.debug( "Killing process %s due to out-of-memory event from kernel.", self._pid_to_kill, ) util.kill_process(self._pid_to_kill) # Also kill all children of subprocesses directly. with open(os.path.join(self._cgroups[MEMORY], "tasks"), "rt") as tasks: for task in tasks: util.kill_process(int(task)) # We now need to increase the memory limit of this cgroup # to give the process a chance to terminate self._reset_memory_limit("memory.memsw.limit_in_bytes") self._reset_memory_limit("memory.limit_in_bytes") finally: close(self._efd)
def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True): tasksFile = os.path.join(cgroup, "tasks") i = 0 while True: i += 1 # TODO We can probably remove this loop over signals and just send # SIGKILL. We added this loop when killing sub-processes was not reliable # and we did not know why, but now it is reliable. for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: with open(tasksFile, "rt") as tasks: task = None for task in tasks: task = task.strip() if i > 1: logging.warning( "Run has left-over process with pid %s " "in cgroup %s, sending signal %s (try %s).", task, cgroup, sig, i, ) util.kill_process(int(task), sig) if task is None or not ensure_empty: return # No process was hanging, exit # wait for the process to exit, this might take some time time.sleep(i * 0.5)
def stop(self): self.PROCESS_KILLED = True with self.SUB_PROCESS_PIDS_LOCK: for pid in self.SUB_PROCESS_PIDS: logging.warning("Killing process %s forcefully.", pid) try: util.kill_process(pid) except EnvironmentError as e: # May fail due to race conditions logging.debug(e)
def _check_limit(self, files_count, files_size): if self._files_count_limit and files_count > self._files_count_limit: reason = "files-count" elif self._files_size_limit and files_size > self._files_size_limit: reason = "files-size" else: return None self._callback(reason) logging.debug( "Killing process %d due to %s limit (%d files with %d bytes).", self._pid_to_kill, reason, files_count, files_size, ) util.kill_process(self._pid_to_kill) return reason
def kill_all_tasks_in_cgroup(cgroup): tasksFile = os.path.join(cgroup, "tasks") freezer_file = os.path.join(cgroup, "freezer.state") def try_write_to_freezer(content): try: util.write_file(content, freezer_file) except IOError: pass # expected if freezer not enabled, we try killing without it i = 0 while True: i += 1 # TODO We can probably remove this loop over signals and just send # SIGKILL. We added this loop when killing sub-processes was not reliable # and we did not know why, but now it is reliable. for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]: try_write_to_freezer("FROZEN") with open(tasksFile, "rt") as tasks: task = None for task in tasks: task = task.strip() if i > 1: logging.warning( "Run has left-over process with pid %s " "in cgroup %s, sending signal %s (try %s).", task, cgroup, sig, i, ) util.kill_process(int(task), sig) if task is None: return # No process was hanging, exit try_write_to_freezer("THAWED") # wait for the process to exit, this might take some time time.sleep(i * 0.5)