Ejemplo n.º 1
0
def remove_expensive_references(source, total_objs=0, show_progress=False):
    """Filter out references that are mere houskeeping links.

    module.__dict__ tends to reference lots of other modules, which in turn
    brings in the global reference cycle. Going further
    function.__globals__ references module.__dict__, so it *too* ends up in
    the global cycle. Generally these references aren't interesting, simply
    because they end up referring to *everything*.

    We filter out any reference to modules, frames, types, function globals
    pointers & LRU sideways references.

    :param source: A callable that returns an iterator of MemObjects. This
        will be called twice.
    :param total_objs: The total objects to be filtered, if known. If
        show_progress is False or the count of objects is unknown, 0.
    :return: An iterator of (changed, MemObject) objects with expensive
        references removed.
    """
    # First pass, find objects we don't want to reference any more
    noref_objs = _intset.IDSet()
    lru_objs = _intset.IDSet()
    total_steps = total_objs * 2
    seen_zero = False
    for idx, obj in enumerate(source()):
        # 'module's have a single __dict__, which tends to refer to other
        # modules. As you start tracking into that, you end up getting into
        # reference cycles, etc, which generally ends up referencing every
        # object in memory.
        # 'frame' also tends to be self referential, and a single frame
        # ends up referencing the entire current state
        # 'type' generally is self referential through several attributes.
        # __bases__ means we recurse all the way up to object, and object
        # has __subclasses__, which means we recurse down into all types.
        # In general, not helpful for debugging memory consumption
        if show_progress and idx & 0x1ff == 0:
            sys.stderr.write('finding expensive refs... %8d / %8d    \r'
                             % (idx, total_steps))
        if obj.type_str in ('module', 'frame', 'type'):
            noref_objs.add(obj.address)
        if obj.type_str == '_LRUNode':
            lru_objs.add(obj.address)
        if obj.address == 0:
            seen_zero = True
    # Second pass, any object which refers to something in noref_objs will
    # have that reference removed, and replaced with the null_memobj
    num_expensive = len(noref_objs)
    null_memobj = _loader._MemObjectProxy_from_args(0, '<ex-reference>', 0, [])
    if not seen_zero:
        yield (True, null_memobj)
    if show_progress and total_objs == 0:
        total_objs = idx
        total_steps = total_objs * 2
    for idx, obj in enumerate(source()):
        if show_progress and idx & 0x1ff == 0:
            sys.stderr.write('removing %d expensive refs... %8d / %8d   \r'
                             % (num_expensive, idx + total_objs,
                                total_steps))
        if obj.type_str == 'function':
            # Functions have a reference to 'globals' which is not very
            # helpful for having a clear understanding of what is going on
            # especially since the function itself is in its own globals
            # XXX: This is probably not a guaranteed order, but currently
            #       func_traverse returns:
            #   func_code, func_globals, func_module, func_defaults,
            #   func_doc, func_name, func_dict, func_closure
            # We want to remove the reference to globals and module
            refs = list(obj.children)
            obj.children = refs[:1] + refs[3:] + [0]
            yield (True, obj)
            continue
        elif obj.type_str == '_LRUNode':
            # We remove the 'sideways' references
            obj.children = [ref for ref in obj.children
                                 if ref not in lru_objs]
            yield (True, obj)
            continue
        for ref in obj.children:
            if ref in noref_objs:
                break
        else:
            # No bad references, keep going
            yield (False, obj)
            continue
        new_ref_list = [ref for ref in obj.children
                             if ref not in noref_objs]
        new_ref_list.append(0)
        obj.children = new_ref_list
        yield (True, obj)
    if show_progress:
        sys.stderr.write('removed %d expensive refs from %d objs%s\n'
                         % (num_expensive, total_objs, ' '*20))
Ejemplo n.º 2
0
def remove_expensive_references(source, total_objs=0, show_progress=False):
    """Filter out references that are mere houskeeping links.

    module.__dict__ tends to reference lots of other modules, which in turn
    brings in the global reference cycle. Going further
    function.__globals__ references module.__dict__, so it *too* ends up in
    the global cycle. Generally these references aren't interesting, simply
    because they end up referring to *everything*.

    We filter out any reference to modules, frames, types, function globals
    pointers & LRU sideways references.

    :param source: A callable that returns an iterator of MemObjects. This
        will be called twice.
    :param total_objs: The total objects to be filtered, if known. If
        show_progress is False or the count of objects is unknown, 0.
    :return: An iterator of (changed, MemObject) objects with expensive
        references removed.
    """
    # First pass, find objects we don't want to reference any more
    noref_objs = _intset.IDSet()
    lru_objs = _intset.IDSet()
    total_steps = total_objs * 2
    seen_zero = False
    for idx, obj in enumerate(source()):
        # 'module's have a single __dict__, which tends to refer to other
        # modules. As you start tracking into that, you end up getting into
        # reference cycles, etc, which generally ends up referencing every
        # object in memory.
        # 'frame' also tends to be self referential, and a single frame
        # ends up referencing the entire current state
        # 'type' generally is self referential through several attributes.
        # __bases__ means we recurse all the way up to object, and object
        # has __subclasses__, which means we recurse down into all types.
        # In general, not helpful for debugging memory consumption
        if show_progress and idx & 0x1ff == 0:
            sys.stderr.write('finding expensive refs... %8d / %8d    \r'
                             % (idx, total_steps))
        if obj.type_str in ('module', 'frame', 'type'):
            noref_objs.add(obj.address)
        if obj.type_str == '_LRUNode':
            lru_objs.add(obj.address)
        if obj.address == 0:
            seen_zero = True
    # Second pass, any object which refers to something in noref_objs will
    # have that reference removed, and replaced with the null_memobj
    num_expensive = len(noref_objs)
    null_memobj = _loader._MemObjectProxy_from_args(0, '<ex-reference>', 0, [])
    if not seen_zero:
        yield (True, null_memobj)
    if show_progress and total_objs == 0:
        total_objs = idx
        total_steps = total_objs * 2
    for idx, obj in enumerate(source()):
        if show_progress and idx & 0x1ff == 0:
            sys.stderr.write('removing %d expensive refs... %8d / %8d   \r'
                             % (num_expensive, idx + total_objs,
                                total_steps))
        if obj.type_str == 'function':
            # Functions have a reference to 'globals' which is not very
            # helpful for having a clear understanding of what is going on
            # especially since the function itself is in its own globals
            # XXX: This is probably not a guaranteed order, but currently
            #       func_traverse returns:
            #   func_code, func_globals, func_module, func_defaults,
            #   func_doc, func_name, func_dict, func_closure
            # We want to remove the reference to globals and module
            refs = list(obj.children)
            obj.children = refs[:1] + refs[3:] + [0]
            yield (True, obj)
            continue
        elif obj.type_str == '_LRUNode':
            # We remove the 'sideways' references
            obj.children = [ref for ref in obj.children
                                 if ref not in lru_objs]
            yield (True, obj)
            continue
        for ref in obj.children:
            if ref in noref_objs:
                break
        else:
            # No bad references, keep going
            yield (False, obj)
            continue
        new_ref_list = [ref for ref in obj.children
                             if ref not in noref_objs]
        new_ref_list.append(0)
        obj.children = new_ref_list
        yield (True, obj)
    if show_progress:
        sys.stderr.write('removed %d expensive refs from %d objs%s\n'
                         % (num_expensive, total_objs, ' '*20))