Example #1
0
    def run(self):
        # os.close gets called in finally,
        # which sometimes is executed while the process is shutting down already.
        # It happens that the Python interpreter has already cleaned up at this point
        # and "os" resolves to None, leading to an AttributeError.
        # Thus we keep our own reference to this function.
        # (Should not happen anymore since this is no longer a daemon thread,
        # but should not hurt anyway.)
        close = os.close
        try:
            # In an eventfd, there are always 8 bytes for the event number.
            # We just do a blocking read to wait for the event.
            _ = os.read(self._efd, 8)
            # If read returned, this means the kernel sent us an event.
            # It does so either on OOM or if the cgroup is removed.
            if not self._finished.is_set():
                self._callback("memory")
                logging.debug(
                    "Killing process %s due to out-of-memory event from kernel.",
                    self._pid_to_kill,
                )
                util.kill_process(self._pid_to_kill)
                # Also kill all children of subprocesses directly.
                with open(os.path.join(self._cgroups[MEMORY], "tasks"), "rt") as tasks:
                    for task in tasks:
                        util.kill_process(int(task))

                # We now need to increase the memory limit of this cgroup
                # to give the process a chance to terminate
                self._reset_memory_limit("memory.memsw.limit_in_bytes")
                self._reset_memory_limit("memory.limit_in_bytes")

        finally:
            close(self._efd)
Example #2
0
def kill_all_tasks_in_cgroup(cgroup, ensure_empty=True):
    tasksFile = os.path.join(cgroup, "tasks")

    i = 0
    while True:
        i += 1
        # TODO We can probably remove this loop over signals and just send
        # SIGKILL. We added this loop when killing sub-processes was not reliable
        # and we did not know why, but now it is reliable.
        for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]:
            with open(tasksFile, "rt") as tasks:
                task = None
                for task in tasks:
                    task = task.strip()
                    if i > 1:
                        logging.warning(
                            "Run has left-over process with pid %s "
                            "in cgroup %s, sending signal %s (try %s).",
                            task,
                            cgroup,
                            sig,
                            i,
                        )
                    util.kill_process(int(task), sig)

                if task is None or not ensure_empty:
                    return  # No process was hanging, exit
            # wait for the process to exit, this might take some time
            time.sleep(i * 0.5)
Example #3
0
 def stop(self):
     self.PROCESS_KILLED = True
     with self.SUB_PROCESS_PIDS_LOCK:
         for pid in self.SUB_PROCESS_PIDS:
             logging.warning("Killing process %s forcefully.", pid)
             try:
                 util.kill_process(pid)
             except EnvironmentError as e:
                 # May fail due to race conditions
                 logging.debug(e)
Example #4
0
 def _check_limit(self, files_count, files_size):
     if self._files_count_limit and files_count > self._files_count_limit:
         reason = "files-count"
     elif self._files_size_limit and files_size > self._files_size_limit:
         reason = "files-size"
     else:
         return None
     self._callback(reason)
     logging.debug(
         "Killing process %d due to %s limit (%d files with %d bytes).",
         self._pid_to_kill,
         reason,
         files_count,
         files_size,
     )
     util.kill_process(self._pid_to_kill)
     return reason
Example #5
0
def kill_all_tasks_in_cgroup(cgroup):
    tasksFile = os.path.join(cgroup, "tasks")
    freezer_file = os.path.join(cgroup, "freezer.state")

    def try_write_to_freezer(content):
        try:
            util.write_file(content, freezer_file)
        except IOError:
            pass  # expected if freezer not enabled, we try killing without it

    i = 0
    while True:
        i += 1
        # TODO We can probably remove this loop over signals and just send
        # SIGKILL. We added this loop when killing sub-processes was not reliable
        # and we did not know why, but now it is reliable.
        for sig in [signal.SIGKILL, signal.SIGINT, signal.SIGTERM]:
            try_write_to_freezer("FROZEN")
            with open(tasksFile, "rt") as tasks:
                task = None
                for task in tasks:
                    task = task.strip()
                    if i > 1:
                        logging.warning(
                            "Run has left-over process with pid %s "
                            "in cgroup %s, sending signal %s (try %s).",
                            task,
                            cgroup,
                            sig,
                            i,
                        )
                    util.kill_process(int(task), sig)

                if task is None:
                    return  # No process was hanging, exit
            try_write_to_freezer("THAWED")
            # wait for the process to exit, this might take some time
            time.sleep(i * 0.5)