def dump_objs(): global TRACKER if TRACKER is None: TRACKER = tracker.SummaryTracker() with open("obj_log.txt", "a") as fp: fp.write("Memory at {}\n".format(str(datetime.datetime.now()))) try: all_objects = muppy.get_objects() sum1 = summary.summarize(all_objects) str_sum = summary.format_(sum1) fp.write("Summary:\n") for line in str_sum: fp.write(" {}\n".format(line)) except Exception: err = traceback.format_exc() fp.write("Error: \n") fp.write(err) try: str_diff = TRACKER.format_diff() fp.write("Diff:\n") for line in str_diff: fp.write(" {}\n".format(line)) except Exception: err = traceback.format_exc() fp.write("Error: \n") fp.write(err) fp.write("\n")
def _capture_snapshot(self): # type: () -> None """ Capture memory usage snapshot. """ capture_time = int(time.time()) # 1. Capture aggregate values all_objects = muppy.get_objects() all_objects = self._filter_muppy_objects(all_objects) sum1 = summary.summarize(all_objects) data = summary.format_(sum1, limit=50) item = { "timestamp": capture_time, "data": list(data), "type": "aggregated", } self._profiling_data.append(item) # 2. Capture diff since the last capture data = self._tracker.format_diff() item = { "timestamp": capture_time, "data": list(data), "type": "diff", } self._profiling_data.append(item)
def memory_profiler(self): all_objects = muppy.get_objects() stats = summary.summarize(all_objects) return { 'Memory_profiler': [l for l in summary.format_(stats, LIMIT_OBJECTS_FOR_PROFILER)] }
def memory_summary(): # Only import Pympler when we need it. We don't want it to # affect our process if we never call memory_summary. from pympler import summary, muppy mem_summary = summary.summarize(muppy.get_objects()) rows = summary.format_(mem_summary) return '\n'.join(rows)
def memory_summary(): from pympler import muppy, summary all_objects = muppy.get_objects() obj_summary = summary.summarize(all_objects) logger.info("\n".join(summary.format_(obj_summary)))
def str_print_(cls, rows, limit=15, sort='size', order='descending'): """helper function to redirect output from pympler from stdout to string""" total_objects = 0 total_memory = 0 for r in rows: total_objects += r[1] total_memory += r[2] str_output = "Total Objects: {:,} Total Mem: {:,.2f} MB\n".format( total_objects, total_memory / (1024 * 1024)) str_output += "Sorted by mem size\n" for line in summary.format_(rows, limit=limit, sort=sort, order=order): str_output += line + '\n' str_output += "Sorted by total amount\n" for line in summary.format_(rows, limit=limit, sort='#', order=order): str_output += line + '\n' return str_output
def format_diff(self, ignore=()): """Format the diff to the last time the state of objects was measured. keyword arguments ignore -- list of objects to ignore """ # ignore this and the caller frame lines = [] diff = self.get_diff(ignore+(inspect.currentframe(),)) lines.append("Added objects:") for line in summary.format_(summary.summarize(diff['+'])): lines.append(line) lines.append("Removed objects:") for line in summary.format_(summary.summarize(diff['-'])): lines.append(line) return lines
def format_diff(self, ignore=[]): """Format the diff to the last time the state of objects was measured. keyword arguments ignore -- list of objects to ignore """ # ignore this and the caller frame ignore.append(inspect.currentframe()) # PYCHOK change ignore diff = self.get_diff(ignore) yield "Added objects:" for line in summary.format_(summary.summarize(diff['+'])): yield line yield "Removed objects:" for line in summary.format_(summary.summarize(diff['-'])): yield line # manual cleanup, see comment above del ignore[:]
async def log_object_summary(interval: float): from pympler import muppy, summary while True: await asyncio.sleep(interval) lines = summary.format_(summary.summarize(muppy.get_objects()), limit=20) logging.info('top objects:\n%s', '\n'.join(lines))
def format_diff(self, ignore=[]): """Format the diff to the last time the state of objects was measured. keyword arguments ignore -- list of objects to ignore """ # ignore this and the caller frame ignore.append(inspect.currentframe()) # PYCHOK change ignore diff = self.get_diff(ignore) yield "Added objects:" for line in summary.format_(summary.summarize(diff["+"])): yield line yield "Removed objects:" for line in summary.format_(summary.summarize(diff["-"])): yield line # manual cleanup, see comment above del ignore[:]
def _get_stats(): from pympler import muppy, summary all_objects = muppy.get_objects() result = summary.summarize(all_objects) result = result[0:20] summary = '\n'.join([l for l in summary.format_(result)]) result = '%s\n\n%s' % (summary, json.dumps(result)) return result, 200, {'content-type': 'text/plain'}
def _format_diff(self, diff, limit=15, sort='size', order='descending'): return "\n" + "\n".join( summary.format_( diff, limit=limit, sort=sort, order=order ) )
def memory_summary(self): # Only import Pympler when we need it. We don't want it to # affect our process if we never call memory_summary. caller = sys._getframe(1).f_code.co_name # So we can reference the caller from pympler import summary, muppy mem_summary = summary.summarize(muppy.get_objects()) rows = summary.format_(mem_summary) indigo.server.log ('\n\nCALLED BY: ' + caller + '\n\n' + '\n'.join(rows) )
def format_diff(self, summary1=None, summary2=None): """Compute diff between to summaries and return a list of formatted lines. If no summary is provided, the diff from the last to the current summary is used. If summary1 is provided the diff from summary1 to the current summary is used. If summary1 and summary2 are provided, the diff between these two is used. """ return summary.format_(self.diff(summary1=summary1, summary2=summary2))
def profile_expose_method(profiled_method_wrapper, accept, args, func, kw, exclude_from_memory_profiling): """ Targeted to profile a specific method that wraps HTTP request processing endpoints into database context. :param profiled_method_wrapper: method wrapped around profiled call to be passed in to memory profiler :param accept: param specific to profiled call :param args: args of a function that is being wrapped by a profiled method :param func: function that is being wrapped by a profiled method :param kw: kwargs of a function that is being wrapped by a profiled method :return: output of a profiled method without modification """ if not exclude_from_memory_profiling and get_memory_profile_logging_on() and \ check_memory_profile_package_wide_disable(func): controller_class = args[0].__class__.__name__ if args and len(args) > 0 else '' end_point_name_parts = [s for s in [func.__module__, controller_class, func.__name__] if s != ''] end_point_name = ".".join(end_point_name_parts) is_pympler_on = _is_pympler_profiling_value_on(end_point_name) profile_output = {'output': {}} if is_pympler_on: all_objects = muppy.get_objects() all_objects_summary_before = summary.summarize(all_objects) memory_profile = memory_usage((_profile_me, (profile_output, profiled_method_wrapper, func, accept, args, kw), {}), interval=0.1) output = profile_output['output'] if is_pympler_on: all_objects_summary_after = summary.summarize(all_objects) diff = summary.get_diff(all_objects_summary_before, all_objects_summary_after) diff_less = summary.format_(diff) diff_out = '' for s in diff_less: diff_out += s+'\n' thread_log.info("================ PYMPLER OUTPUT <{}> ==============\n{}".format(end_point_name, diff_out)) try: message = json.dumps({'log_type': 'memory_profile', 'proc_id': os.getpid(), 'name': func.__name__, 'module': func.__module__, 'mem_profile': memory_profile, 'min': min(memory_profile), 'max': max(memory_profile), 'diff': max(memory_profile) - min(memory_profile), 'leaked': memory_profile[-1] - memory_profile[0], 'args': [arg for arg in args[1:]], # exclude self 'kwargs': kw}) memory_log.info(message, extra={'controller_module': func.__module__, 'controller_class': controller_class, 'endpoint': func.__name__}) except Exception as e: thread_log.exception('Logger failed: {}'.format(e)) else: output = profiled_method_wrapper(accept, args, func, kw) return output
def memory_summary(self): # Only import Pympler when we need it. We don't want it to # affect our process if we never call memory_summary. caller = sys._getframe( 1).f_code.co_name # So we can reference the caller from pympler import summary, muppy mem_summary = summary.summarize(muppy.get_objects()) rows = summary.format_(mem_summary) indigo.server.log('\n\nCALLED BY: ' + caller + '\n\n' + '\n'.join(rows))
def dump(self): allObjects = muppy.get_objects() summ = summary.summarize(allObjects) self.logDebug('----- Objects summary -----') for line in summary.format_(summ, limit=15, sort='size', order='descending'): self.logDebug(line) if self._aliceTracker: self.logDebug('----- ProjectAliceObject tracker -----') self._aliceTracker.create_snapshot() self._aliceTracker.stats.print_summary() if self._tracker: self.logDebug('----- Evolution over time -----') for line in summary.format_(self._tracker.diff()): self.logDebug(line) self._tracker = tracker.SummaryTracker()
def printmemory(sig, currentframe): try: # pyre-fixme[21]: Could not find `pympler`. from pympler import muppy, summary muppy.get_objects except ImportError: return all_objects = muppy.get_objects() sum1 = summary.summarize(all_objects) path = mempathformat % {"time": time.time(), "pid": os.getpid()} with open(path, "w") as f: f.write("\n".join(summary.format_(sum1, limit=50, sort="#")))
def loop(interval): mainthread = main_thread() while True: time.sleep(interval) stacktrace = "".join(format_thread(mainthread)) rows = summary.summarize(muppy.get_objects()) memtrace = "\n".join(summary.format_(rows)) logger.info("Watchdog traceback:\n" f"{stacktrace}\n" f"{memtrace}")
def loop(interval): mainthread = main_thread() while True: time.sleep(interval) try: stacktrace = "".join(format_thread(mainthread)) rows = summary.summarize(muppy.get_objects()) memtrace = "\n".join(summary.format_(rows)) logger.info("Watchdog traceback:\n" f"{stacktrace}\n" f"{memtrace}") except Exception as e: logger.error("Error in watchdog", exc_info=e)
def log_summary(self, msg=None): """ Generates a summary of all memory used. The summary is returned as a string and logged to a file """ if self.on: all_objs=muppy.get_objects() all_summ=summary.summarize(all_objs) formatted_summ="\n".join( summary.format_( all_summ, limit=15, sort='size', order='descending' ) ) if msg is not None: self.logger.debug('Full Summary:\n' + msg + '\n' +\ formatted_summ) else: self.logger.debug('Full Summary:\n' + formatted_summ) return formatted_summ
def memory_profiler(self): all_objects = muppy.get_objects() stats = summary.summarize(all_objects) return {'Memory_profiler': [l for l in summary.format_(stats, LIMIT_OBJECTS_FOR_PROFILER)]}
def yield_heapdump(heap_summary): for line in summary.format_(heap_summary): yield f'{line}\n'
def work(self, use_defaults=True, choose_randomly=True, wait=True, verbose=False): """Get unfinished Datasets from the database and work on them. Args: use_defaults (bool): <MISSING> choose_randomly (bool): If ``True``, work on all the highest-priority datasets in random order. Otherwise, work on them in sequential order (by ID). Optional. Defaults to ``True``. wait (bool): If ``True``, wait for more datasets to be inserted into the Database once all have been processed. Otherwise, exit the worker loop when they ds out. Optional. Defaults to ``False``. verbose (bool): Whether to be verbose about the process. Optional. Defaults to ``True``. """ signal.signal(signal.SIGUSR1, lambda s, frame: self._user_abort()) # ########################################################################## # # Main Loop ############################################################ # ########################################################################## failure_counter = 0 # Count number of running workers pids = set() core = None if self.affinity: for p in psutil.process_iter(): if re.match('.*python\\d?', p.name()) and 'worker' in p.cmdline() and \ len([arg for arg in p.cmdline() if arg.endswith('cli.py')]) > 0: if p.parent() is None or p.parent().pid not in pids: pids.add(p.pid) core = {len(pids) - 1 % os.cpu_count()} LOGGER.info('Setting affinity to {}'.format(core)) while True: if self._abort: LOGGER.info("Stopping processing due to user request") break ds = None if use_defaults: ds = self.db.select_dataset() else: # Get all pending and running datasets, or all pending/running datasets from the list we were given datasets = self.db.get_datasets() if len(datasets) > 0: # Either choose a dataset randomly between priority, or take the dataset with the lowest ID""" if choose_randomly: ds = random.choice(datasets) else: ds = sorted(datasets, key=attrgetter('id'))[0] del datasets try: self.db.mark_dataset_running(ds.id) except UserWarning: LOGGER.warning('Skipping completed dataset: {}'.format( ds.id)) if not ds: if wait: LOGGER.debug( 'No datasets found. Sleeping %d seconds and trying again.', self._LOOP_WAIT) time.sleep(self._LOOP_WAIT) continue else: LOGGER.info('No datasets found. Exiting.') break LOGGER.info('Computing on dataset {}'.format(ds.id)) worker = None """ Progress bar """ try: pbar = tqdm(total=ds.budget, ascii=True, initial=ds.processed, disable=not verbose) """Creates Worker""" worker = Worker( self.db, ds, self, timeout=self.timeout, s3_config=self.s3_config, s3_bucket=self.s3_bucket, complete_pipelines=self.complete_pipelines, complete_pipeline_samples=self.complete_pipeline_samples, max_pipeline_depth=self.max_pipeline_depth, affinity=core, verbose_metrics=self.verbose_metrics) """Call run_algorithm as long as the chosen dataset is marked as RUNNING""" while ds.status == RunStatus.RUNNING: if use_defaults: worker.run_default() self.db.mark_dataset_complete(ds.id) delete_data(ds.train_path) break success = worker.run_algorithm() ds = self.db.get_dataset(ds.id) if verbose and ds.processed > pbar.last_print_n: pbar.update(ds.processed - pbar.last_print_n) # Safety valve to abort execution if something is broken if success is False: LOGGER.error( 'Something went wrong. Sleeping {} seconds.'. format(self._LOOP_WAIT)) time.sleep(self._LOOP_WAIT) failure_counter += 1 if failure_counter > 10: LOGGER.fatal( 'Received 10 consecutive unexpected exceptions. Aborting evaluation.' ) # We occasionally encounter OSError: [Errno 12] Cannot allocate memory. To debug the memory # leak the current heap allocation is logged all_objects = muppy.get_objects() LOGGER.fatal('Heap Dump:\n' + '\n'.join( summary.format_(summary.summarize( all_objects)))) buffer = StringIO() cb = refbrowser.StreamBrowser( self, maxdepth=4, str_func=lambda o: str(type(o)), stream=buffer) cb.print_tree() LOGGER.fatal('References:\n' + buffer.getvalue()) sys.exit(1) else: failure_counter = 0 pbar.close() except AlgorithmError: """ The exception has already been handled; just wait a sec so we don't go out of control reporting errors """ LOGGER.error( 'Something went wrong. Sleeping {} seconds.'.format( self._LOOP_WAIT)) time.sleep(self._LOOP_WAIT) finally: del worker
def mem(): objs = muppy.get_objects() summ = summary.summarize(objs) return "\n".join(summary.format_(summ)) + "\n"
def memory_summary(): mem_summary = summary.summarize(muppy.get_objects()) rows = summary.format_(mem_summary) return '\n'.join(rows)
def profile_expose_method(profiled_method_wrapper, accept, args, func, kw, exclude_from_memory_profiling): """ Targeted to profile a specific method that wraps HTTP request processing endpoints into database context. :param profiled_method_wrapper: method wrapped around profiled call to be passed in to memory profiler :param accept: param specific to profiled call :param args: args of a function that is being wrapped by a profiled method :param func: function that is being wrapped by a profiled method :param kw: kwargs of a function that is being wrapped by a profiled method :return: output of a profiled method without modification """ if not exclude_from_memory_profiling and get_memory_profile_logging_on() and \ check_memory_profile_package_wide_disable(func): controller_class = args[0].__class__.__name__ if args and len( args) > 0 else '' end_point_name_parts = [ s for s in [func.__module__, controller_class, func.__name__] if s != '' ] end_point_name = ".".join(end_point_name_parts) is_pympler_on = _is_pympler_profiling_value_on(end_point_name) profile_output = {'output': {}} if is_pympler_on: all_objects = muppy.get_objects() all_objects_summary_before = summary.summarize(all_objects) memory_profile = memory_usage( (_profile_me, (profile_output, profiled_method_wrapper, func, accept, args, kw), {}), interval=0.1) output = profile_output['output'] if is_pympler_on: all_objects_summary_after = summary.summarize(all_objects) diff = summary.get_diff(all_objects_summary_before, all_objects_summary_after) diff_less = summary.format_(diff) diff_out = '' for s in diff_less: diff_out += s + '\n' thread_log.info( "================ PYMPLER OUTPUT <{}> ==============\n{}". format(end_point_name, diff_out)) try: message = json.dumps({ 'log_type': 'memory_profile', 'proc_id': os.getpid(), 'name': func.__name__, 'module': func.__module__, 'mem_profile': memory_profile, 'min': min(memory_profile), 'max': max(memory_profile), 'diff': max(memory_profile) - min(memory_profile), 'leaked': memory_profile[-1] - memory_profile[0], 'args': [arg for arg in args[1:]], # exclude self 'kwargs': kw }) memory_log.info(message, extra={ 'controller_module': func.__module__, 'controller_class': controller_class, 'endpoint': func.__name__ }) except Exception as e: thread_log.exception('Logger failed: {}'.format(e)) else: output = profiled_method_wrapper(accept, args, func, kw) return output
def mem(): objs = muppy.get_objects() summ = summary.summarize(objs) return '\n'.join(summary.format_(summ)) + '\n'