def get_this_psutil_process(): # Returns a freshly queried object each time. try: from psutil import Process, AccessDenied # Make sure it works (why would we be denied access to our own process?) try: proc = Process() proc.memory_full_info() except AccessDenied: # pragma: no cover proc = None except ImportError: # pragma: no cover proc = None return proc
def get_this_psutil_process(): # Depends on psutil. Defer the import until needed, who knows what # it imports (psutil imports subprocess which on Python 3 imports # selectors. This can expose issues with monkey-patching.) # Returns a freshly queried object each time. try: from psutil import Process, AccessDenied # Make sure it works (why would we be denied access to our own process?) try: proc = Process() proc.memory_full_info() except AccessDenied: # pragma: no cover proc = None except ImportError: proc = None return proc
def get_memory(self, process: Process) -> Bytes: """Get the process memory.""" try: mem = process.memory_full_info()[0] self._max_mem = max(mem, self._max_mem) return mem except NoSuchProcess: return 0
def _get_memory_usage(process: psutil.Process): # memory_usage = process.memory_full_info().uss try: memory_usage = process.memory_full_info().uss except psutil.AccessDenied: memory_usage = 0 return memory_usage
def _get_process(self): # pylint:disable=method-hidden try: # The standard library 'resource' module doesn't provide # a standard way to get the RSS measure, only the maximum. # You might be tempted to try to compute something by adding # together text and data sizes, but on many systems those come back # zero. So our only option is psutil. from psutil import Process, AccessDenied # Make sure it works (why would we be denied access to our own process?) try: proc = Process() proc.memory_full_info() except AccessDenied: # pragma: no cover proc = None except ImportError: proc = None self._get_process = lambda: proc return proc
def track_memory_usage(out_path: str, write_frequency: float): """Track how busy the head node is Args: out_path: Path to the output file write_frequency: How often to write (s) """ while True: # Get a timestamp ts = datetime.now().timestamp() # Measure the thinker process proc = Process() my_usage = proc.cpu_percent() my_memory = proc.memory_full_info().pss # Measure all processes from my user my_name = getuser() all_cpu = all_memory = 0 for proc in process_iter(): if proc.username() != my_name: continue try: all_cpu += proc.cpu_percent() all_memory += proc.memory_full_info().pss except: continue with open(out_path, 'a') as fp: print(json.dumps({ 'time': ts, 'thinker_cpu': my_usage, 'thinker_mem': my_memory, 'all_cpu': all_cpu, 'all_mem': all_memory }), file=fp) time.sleep(write_frequency)
def get_proc_data(proc: psutil.Process) -> Dict[str, Union[int, float]]: data: Dict[str, Union[int, float]] = {} try: with proc.oneshot(): data.update(proc.memory_full_info()._asdict()) data["cpu_percent"] = proc.cpu_percent() data["rss_percent"] = data["rss"] / TOTAL_MEMORY * 100 data["pss_percent"] = data["pss"] / TOTAL_MEMORY * 100 data["uss_percent"] = data["uss"] / TOTAL_MEMORY * 100 data["vms_percent"] = data["vms"] / TOTAL_MEMORY * 100 except psutil.NoSuchProcess: raise ValueError(f"The process no longer exists: {proc.pid}") else: return data
class TimeAndMemoryTracker: def __init__(self, initial_mem=None, initial_time=None): if initial_mem is None: self.proc = Process(getpid()) self.mem0 = self.proc.memory_full_info().rss else: self.mem0 = initial_mem if initial_time is None: self.t0 = time() else: self.t0 = initial_time def current_relative_info(self, event_name): print('\t', event_name, '\t(current memory: ', self.proc.memory_full_info().rss / self.mem0, ')', '\t(time elapsed: ', time() - self.t0, ')') def update_memory(self, new_mem): self.mem0 = new_mem def update_time(self, new_time): self.t0 = new_time
class SupervisorProcess(object): def __init__(self, name, group, pid, state, statename, start, **kwargs): self.name = name self.group = group if statename == 'STOPPED': self.pid = None self.cpu_stats = None self.process = None else: self.pid = int(pid) self.cpu_stats = CPUStats(self.pid) try: self.process = Process(self.pid) except NoSuchProcess: self.process = None self.state = state self.statename = statename self.start = start self.stats = [] def update(self, pid, statename, start, **kwargs): if statename == 'STOPPED': self.pid = None self.cpu_stats = None self.process = None else: if pid != self.pid: self.pid = int(pid) self.cpu_stats = CPUStats(self.pid) try: self.process = Process(self.pid) except NoSuchProcess: self.process = None self.statename = statename self.state = STATE_MAP[statename] self.start = start def sample(self): timestamp = time() if self.cpu_stats: user_util, sys_util = self.cpu_stats.cpu_percent_change() else: user_util = 0.0 if self.process: try: # http://www.pybloggers.com/psutil-4-0-0-and-how-to-get-real-process-memory-and-environ-in-python/ memory = self.process.memory_full_info().uss except NoSuchProcess: memory = 0 else: try: self.process = Process(self.pid) memory = self.process.memory_full_info().uss except NoSuchProcess: memory = 0 self.stats.append([timestamp, user_util, memory]) def reset(self): self.stats = [] def state_update(self): return {'state': {'name': self.name, 'group': self.group, 'pid': self.pid, 'state': self.state, 'statename': self.statename, 'start': self.start}} def __repr__(self): return '<SupervisorProcess (name: {self.name}, group: {self.group}, ' 'pid: {self.pid}, start: {self.start}, state: {self.state}, ' 'statename: {self.statename}, stats: {self.stats})'.format(self=self) def __json__(self): return {'name': self.name, 'group': self.group, 'pid': self.pid, 'state': self.state, 'start': self.start, 'stats': self.stats, 'statename': self.statename}
def run_subprocess(command, shell=False, doexec=True, monitor=False, tile_id=None): """Subprocess runner If subrocess returns non-zero exit code, STDERR is sent to the logger. Parameters ---------- command : list of str Command to pass to subprocess.run(). Eg ['wget', '-q', '-r', dl_url] shell : bool Passed to subprocess.run() doexec : bool Execute the subprocess or just print out the concatenated command Returns ------- nothing nothing """ if doexec: cmd = " ".join(command) if shell: command = cmd logger.debug(command) popen = Popen(command, shell=shell, stderr=PIPE, stdout=PIPE) pid = popen.pid if monitor: proc = Process(pid) with proc.oneshot(): try: logger_perf.debug( "%s;%s;%s" % (tile_id, proc.memory_full_info(), swap_memory())) except NoSuchProcess or ZombieProcess: logger.debug("%s is Zombie or NoSuchProcess" % tile_id) except AccessDenied as e: logger_perf.exception(e) # if monitor: # running = True # proc = Process(pid) # with proc.oneshot(): # while running: # try: # logger_perf.debug("%s - %s - %s - %s - %s" % ( # tile_id, proc.cpu_percent(), proc.cpu_times(), proc.memory_full_info(), swap_memory())) # except NoSuchProcess or ZombieProcess: # logger.debug("%s is Zombie or NoSuchProcess" % tile_id) # break # except AccessDenied as e: # logger_perf.exception(e) # break # running = proc.is_running() # logger.debug("%s is running: %s" % (tile_id, running)) # sleep(1) stdout, stderr = popen.communicate() err = stderr.decode(locale.getpreferredencoding(do_setlocale=True)) popen.wait() if popen.returncode != 0: logger.debug("Process returned with non-zero exit code: %s", popen.returncode) logger.error(err) return False else: return True else: logger.debug("Not executing %s", command) return True
for n, v in zip(self.names, self.data): print(n, end="") print(": ", end="") print(str(v), end="") print(",") print("}\n") def pickle(self, filename): data_dict = dict(zip(self.names, self.data)) with open(filename, "wb") as handle: pickle.dump(data_dict, handle) if __name__ == '__main__': proc = Process(getpid()) mem0 = proc.memory_full_info().rss t0 = time() # parameters current_split_lvl = ArticleSplitter current_tokeniser = ToktokTokenizer language = "ALS" print("Processing ", language, " on splitting level ", current_split_lvl.__name__) #alphanum_tok = RegexpTokenizer(r'\w+') special_chars = get_special_char_regexp() special_char_remover = lambda s: special_chars.sub(' ', s) corpus_dir = "Wikiextractor/" + language wiki = WikiReader(corpus_dir,