Beispiel #1
0
def dump_all_referenced(outf, obj, is_pending=False):
    """Recursively dump everything that is referenced from obj."""
    if isinstance(outf, six.string_types):
        outf = open(outf, 'wb')
    if is_pending:
        pending = obj
    else:
        pending = [obj]
    last_offset = len(pending) - 1
    # TODO: Instead of using an IDSet, we could use a BloomFilter. It would
    #       mean some objects may not get dumped (blooms say "yes you
    #       definitely are not present", but only "you might already be
    #       present", collisions cause false positives.)
    #       However, you can get by with 8-10bits for a 1% FPR, rather than
    #       using 32/64-bit pointers + overhead for avoiding hash collisions.
    #       So on 64-bit we drop from 16bytes/object to 1...
    seen = _intset.IDSet()
    if is_pending:
        seen.add(id(pending))
    while last_offset >= 0:
        next = pending[last_offset]
        last_offset -= 1
        id_next = id(next)
        if id_next in seen:
            continue
        seen.add(id_next)
        # We will recurse here, so tell dump_object_info to not recurse
        _scanner.dump_object_info(outf, next, recurse_depth=0)
        for ref in get_referents(next):
            if id(ref) not in seen:
                last_offset += 1
                if len(pending) > last_offset:
                    pending[last_offset] = ref
                else:
                    pending.append(ref)
Beispiel #2
0
def get_recursive_size(obj):
    """Get the memory referenced from this object.

    This returns the memory of the direct object, and all of the memory
    referenced by child objects. It also returns the total number of objects.
    """
    total_size = 0
    pending = [obj]
    last_item = 0
    seen = _intset.IDSet()
    size_of = _scanner.size_of
    while last_item >= 0:
        item = pending[last_item]
        last_item -= 1
        id_item = id(item)
        if id_item in seen:
            continue
        seen.add(id_item)
        total_size += size_of(item)
        for child in get_referents(item):
            if id(child) not in seen:
                last_item += 1
                if len(pending) > last_item:
                    pending[last_item] = child
                else:
                    pending.append(child)
    return len(seen), total_size
Beispiel #3
0
def get_recursive_items(obj):
    """Walk all referred items and return the unique list of them."""
    all = []
    pending = [obj]
    last_item = 0
    seen = _intset.IDSet()
    while last_item >= 0:
        item = pending[last_item]
        last_item -= 1
        id_item = id(item)
        if id_item in seen:
            continue
        seen.add(id_item)
        all.append(item)
        for child in get_referents(item):
            if id(child) not in seen:
                last_item += 1
                if len(pending) > last_item:
                    pending[last_item] = child
                else:
                    pending.append(child)
    return all
Beispiel #4
0
def remove_expensive_references(source, total_objs=0, show_progress=False):
    """Filter out references that are mere houskeeping links.

    module.__dict__ tends to reference lots of other modules, which in turn
    brings in the global reference cycle. Going further
    function.__globals__ references module.__dict__, so it *too* ends up in
    the global cycle. Generally these references aren't interesting, simply
    because they end up referring to *everything*.

    We filter out any reference to modules, frames, types, function globals
    pointers & LRU sideways references.

    :param source: A callable that returns an iterator of MemObjects. This
        will be called twice.
    :param total_objs: The total objects to be filtered, if known. If
        show_progress is False or the count of objects is unknown, 0.
    :return: An iterator of (changed, MemObject) objects with expensive
        references removed.
    """
    # First pass, find objects we don't want to reference any more
    noref_objs = _intset.IDSet()
    lru_objs = _intset.IDSet()
    total_steps = total_objs * 2
    seen_zero = False
    for idx, obj in enumerate(source()):
        # 'module's have a single __dict__, which tends to refer to other
        # modules. As you start tracking into that, you end up getting into
        # reference cycles, etc, which generally ends up referencing every
        # object in memory.
        # 'frame' also tends to be self referential, and a single frame
        # ends up referencing the entire current state
        # 'type' generally is self referential through several attributes.
        # __bases__ means we recurse all the way up to object, and object
        # has __subclasses__, which means we recurse down into all types.
        # In general, not helpful for debugging memory consumption
        if show_progress and idx & 0x1ff == 0:
            sys.stderr.write('finding expensive refs... %8d / %8d    \r'
                             % (idx, total_steps))
        if obj.type_str in ('module', 'frame', 'type'):
            noref_objs.add(obj.address)
        if obj.type_str == '_LRUNode':
            lru_objs.add(obj.address)
        if obj.address == 0:
            seen_zero = True
    # Second pass, any object which refers to something in noref_objs will
    # have that reference removed, and replaced with the null_memobj
    num_expensive = len(noref_objs)
    null_memobj = _loader._MemObjectProxy_from_args(0, '<ex-reference>', 0, [])
    if not seen_zero:
        yield (True, null_memobj)
    if show_progress and total_objs == 0:
        total_objs = idx
        total_steps = total_objs * 2
    for idx, obj in enumerate(source()):
        if show_progress and idx & 0x1ff == 0:
            sys.stderr.write('removing %d expensive refs... %8d / %8d   \r'
                             % (num_expensive, idx + total_objs,
                                total_steps))
        if obj.type_str == 'function':
            # Functions have a reference to 'globals' which is not very
            # helpful for having a clear understanding of what is going on
            # especially since the function itself is in its own globals
            # XXX: This is probably not a guaranteed order, but currently
            #       func_traverse returns:
            #   func_code, func_globals, func_module, func_defaults,
            #   func_doc, func_name, func_dict, func_closure
            # We want to remove the reference to globals and module
            refs = list(obj.children)
            obj.children = refs[:1] + refs[3:] + [0]
            yield (True, obj)
            continue
        elif obj.type_str == '_LRUNode':
            # We remove the 'sideways' references
            obj.children = [ref for ref in obj.children
                                 if ref not in lru_objs]
            yield (True, obj)
            continue
        for ref in obj.children:
            if ref in noref_objs:
                break
        else:
            # No bad references, keep going
            yield (False, obj)
            continue
        new_ref_list = [ref for ref in obj.children
                             if ref not in noref_objs]
        new_ref_list.append(0)
        obj.children = new_ref_list
        yield (True, obj)
    if show_progress:
        sys.stderr.write('removed %d expensive refs from %d objs%s\n'
                         % (num_expensive, total_objs, ' '*20))