def dump_all_referenced(outf, obj, is_pending=False): """Recursively dump everything that is referenced from obj.""" if isinstance(outf, six.string_types): outf = open(outf, 'wb') if is_pending: pending = obj else: pending = [obj] last_offset = len(pending) - 1 # TODO: Instead of using an IDSet, we could use a BloomFilter. It would # mean some objects may not get dumped (blooms say "yes you # definitely are not present", but only "you might already be # present", collisions cause false positives.) # However, you can get by with 8-10bits for a 1% FPR, rather than # using 32/64-bit pointers + overhead for avoiding hash collisions. # So on 64-bit we drop from 16bytes/object to 1... seen = _intset.IDSet() if is_pending: seen.add(id(pending)) while last_offset >= 0: next = pending[last_offset] last_offset -= 1 id_next = id(next) if id_next in seen: continue seen.add(id_next) # We will recurse here, so tell dump_object_info to not recurse _scanner.dump_object_info(outf, next, recurse_depth=0) for ref in get_referents(next): if id(ref) not in seen: last_offset += 1 if len(pending) > last_offset: pending[last_offset] = ref else: pending.append(ref)
def get_recursive_size(obj): """Get the memory referenced from this object. This returns the memory of the direct object, and all of the memory referenced by child objects. It also returns the total number of objects. """ total_size = 0 pending = [obj] last_item = 0 seen = _intset.IDSet() size_of = _scanner.size_of while last_item >= 0: item = pending[last_item] last_item -= 1 id_item = id(item) if id_item in seen: continue seen.add(id_item) total_size += size_of(item) for child in get_referents(item): if id(child) not in seen: last_item += 1 if len(pending) > last_item: pending[last_item] = child else: pending.append(child) return len(seen), total_size
def get_recursive_items(obj): """Walk all referred items and return the unique list of them.""" all = [] pending = [obj] last_item = 0 seen = _intset.IDSet() while last_item >= 0: item = pending[last_item] last_item -= 1 id_item = id(item) if id_item in seen: continue seen.add(id_item) all.append(item) for child in get_referents(item): if id(child) not in seen: last_item += 1 if len(pending) > last_item: pending[last_item] = child else: pending.append(child) return all
def remove_expensive_references(source, total_objs=0, show_progress=False): """Filter out references that are mere houskeeping links. module.__dict__ tends to reference lots of other modules, which in turn brings in the global reference cycle. Going further function.__globals__ references module.__dict__, so it *too* ends up in the global cycle. Generally these references aren't interesting, simply because they end up referring to *everything*. We filter out any reference to modules, frames, types, function globals pointers & LRU sideways references. :param source: A callable that returns an iterator of MemObjects. This will be called twice. :param total_objs: The total objects to be filtered, if known. If show_progress is False or the count of objects is unknown, 0. :return: An iterator of (changed, MemObject) objects with expensive references removed. """ # First pass, find objects we don't want to reference any more noref_objs = _intset.IDSet() lru_objs = _intset.IDSet() total_steps = total_objs * 2 seen_zero = False for idx, obj in enumerate(source()): # 'module's have a single __dict__, which tends to refer to other # modules. As you start tracking into that, you end up getting into # reference cycles, etc, which generally ends up referencing every # object in memory. # 'frame' also tends to be self referential, and a single frame # ends up referencing the entire current state # 'type' generally is self referential through several attributes. # __bases__ means we recurse all the way up to object, and object # has __subclasses__, which means we recurse down into all types. # In general, not helpful for debugging memory consumption if show_progress and idx & 0x1ff == 0: sys.stderr.write('finding expensive refs... %8d / %8d \r' % (idx, total_steps)) if obj.type_str in ('module', 'frame', 'type'): noref_objs.add(obj.address) if obj.type_str == '_LRUNode': lru_objs.add(obj.address) if obj.address == 0: seen_zero = True # Second pass, any object which refers to something in noref_objs will # have that reference removed, and replaced with the null_memobj num_expensive = len(noref_objs) null_memobj = _loader._MemObjectProxy_from_args(0, '<ex-reference>', 0, []) if not seen_zero: yield (True, null_memobj) if show_progress and total_objs == 0: total_objs = idx total_steps = total_objs * 2 for idx, obj in enumerate(source()): if show_progress and idx & 0x1ff == 0: sys.stderr.write('removing %d expensive refs... %8d / %8d \r' % (num_expensive, idx + total_objs, total_steps)) if obj.type_str == 'function': # Functions have a reference to 'globals' which is not very # helpful for having a clear understanding of what is going on # especially since the function itself is in its own globals # XXX: This is probably not a guaranteed order, but currently # func_traverse returns: # func_code, func_globals, func_module, func_defaults, # func_doc, func_name, func_dict, func_closure # We want to remove the reference to globals and module refs = list(obj.children) obj.children = refs[:1] + refs[3:] + [0] yield (True, obj) continue elif obj.type_str == '_LRUNode': # We remove the 'sideways' references obj.children = [ref for ref in obj.children if ref not in lru_objs] yield (True, obj) continue for ref in obj.children: if ref in noref_objs: break else: # No bad references, keep going yield (False, obj) continue new_ref_list = [ref for ref in obj.children if ref not in noref_objs] new_ref_list.append(0) obj.children = new_ref_list yield (True, obj) if show_progress: sys.stderr.write('removed %d expensive refs from %d objs%s\n' % (num_expensive, total_objs, ' '*20))