Example #1
0
def dump_all_referenced(outf, obj, is_pending=False):
    """Recursively dump everything that is referenced from obj."""
    if isinstance(outf, six.string_types):
        outf = open(outf, 'wb')
    if is_pending:
        pending = obj
    else:
        pending = [obj]
    last_offset = len(pending) - 1
    # TODO: Instead of using an IDSet, we could use a BloomFilter. It would
    #       mean some objects may not get dumped (blooms say "yes you
    #       definitely are not present", but only "you might already be
    #       present", collisions cause false positives.)
    #       However, you can get by with 8-10bits for a 1% FPR, rather than
    #       using 32/64-bit pointers + overhead for avoiding hash collisions.
    #       So on 64-bit we drop from 16bytes/object to 1...
    seen = _intset.IDSet()
    if is_pending:
        seen.add(id(pending))
    while last_offset >= 0:
        next = pending[last_offset]
        last_offset -= 1
        id_next = id(next)
        if id_next in seen:
            continue
        seen.add(id_next)
        # We will recurse here, so tell dump_object_info to not recurse
        _scanner.dump_object_info(outf, next, recurse_depth=0)
        for ref in get_referents(next):
            if id(ref) not in seen:
                last_offset += 1
                if len(pending) > last_offset:
                    pending[last_offset] = ref
                else:
                    pending.append(ref)
Example #2
0
 def test_dump_large_int(self):
     t = tempfile.TemporaryFile(prefix='meliae-')
     t_file = getattr(t, 'file', t)
     try:
         _scanner.dump_object_info(t_file, 2**80)
     except OverflowError:
         self.fail('OveflowError raised')
Example #3
0
def dump_all_referenced(outf, obj, is_pending=False):
    """Recursively dump everything that is referenced from obj."""
    if isinstance(outf, str):
        outf = open(outf, 'wb')
    if is_pending:
        pending = obj
    else:
        pending = [obj]
    last_offset = len(pending) - 1
    # TODO: Instead of using an IDSet, we could use a BloomFilter. It would
    #       mean some objects may not get dumped (blooms say "yes you
    #       definitely are not present", but only "you might already be
    #       present", collisions cause false positives.)
    #       However, you can get by with 8-10bits for a 1% FPR, rather than
    #       using 32/64-bit pointers + overhead for avoiding hash collisions.
    #       So on 64-bit we drop from 16bytes/object to 1...
    seen = _intset.IDSet()
    if is_pending:
        seen.add(id(pending))
    while last_offset >= 0:
        next = pending[last_offset]
        last_offset -= 1
        id_next = id(next)
        if id_next in seen:
            continue
        seen.add(id_next)
        # We will recurse here, so tell dump_object_info to not recurse
        _scanner.dump_object_info(outf, next, recurse_depth=0)
        for ref in get_referents(next):
            if id(ref) not in seen:
                last_offset += 1
                if len(pending) > last_offset:
                    pending[last_offset] = ref
                else:
                    pending.append(ref)
Example #4
0
 def assertDumpInfo(self, obj, nodump=None):
     t = tempfile.TemporaryFile(prefix='meliae-')
     # On some platforms TemporaryFile returns a wrapper object with 'file'
     # being the real object, on others, the returned object *is* the real
     # file object
     t_file = getattr(t, 'file', t)
     _scanner.dump_object_info(t_file, obj, nodump=nodump)
     t.seek(0)
     as_bytes = t.read()
     self.assertEqual(py_dump_object_info(obj, nodump=nodump), as_bytes)
     as_list = []
     _scanner.dump_object_info(as_list.append, obj, nodump=nodump)
     self.assertEqual(as_bytes, ''.join(as_list))
Example #5
0
 def assertDumpInfo(self, obj, nodump=None):
     t = tempfile.TemporaryFile(prefix='meliae-')
     # On some platforms TemporaryFile returns a wrapper object with 'file'
     # being the real object, on others, the returned object *is* the real
     # file object
     t_file = getattr(t, 'file', t)
     _scanner.dump_object_info(t_file, obj, nodump=nodump)
     t.seek(0)
     as_bytes = t.read()
     self.assertEqual(py_dump_object_info(obj, nodump=nodump), as_bytes)
     as_list = []
     _scanner.dump_object_info(as_list.append, obj, nodump=nodump)
     self.assertEqual(as_bytes, ''.join(as_list))
Example #6
0
def dump_gc_objects(outf, recurse_depth=1):
    """Dump everything that is available via gc.get_objects().
    """
    if isinstance(outf, six.string_types):
        opened = True
        outf = open(outf, 'wb')
    else:
        opened = False
    # Get the list of everything before we start building new objects
    all_objs = gc.get_objects()
    # Dump out a few specific objects, so they don't get repeated forever
    nodump = [None, True, False]
    # In current versions of python, these are all pre-cached
    nodump.extend(range(-5, 256))
    nodump.extend([chr(c) for c in range(256)])
    nodump.extend(
        [t for t in types.__dict__.itervalues() if type(t) is types.TypeType])
    nodump.extend([set, dict])
    # Some very common interned strings
    nodump.extend(
        ('__doc__', 'self', 'operator', '__init__', 'codecs', '__new__',
         '__builtin__', '__builtins__', 'error', 'len', 'errors', 'keys',
         'None', '__module__', 'file', 'name', '', 'sys', 'True', 'False'))
    nodump.extend((BaseException, Exception, StandardError, ValueError))
    for obj in nodump:
        _scanner.dump_object_info(outf, obj, nodump=None, recurse_depth=0)
    # Avoid dumping the all_objs list and this function as well. This helps
    # avoid getting a 'reference everything in existence' problem.
    nodump.append(dump_gc_objects)
    # This currently costs us ~16kB during dumping, but means we won't write
    # out those objects multiple times in the log file.
    # TODO: we might want to make nodump a variable-size dict, and add anything
    #       with ob_refcnt > 1000 or so.
    nodump = frozenset(nodump)
    for obj in all_objs:
        _scanner.dump_object_info(outf,
                                  obj,
                                  nodump=nodump,
                                  recurse_depth=recurse_depth)
    del all_objs[:]
    if opened:
        outf.close()
    else:
        outf.flush()
Example #7
0
def dump_gc_objects(outf, recurse_depth=1):
    """Dump everything that is available via gc.get_objects().
    """
    if isinstance(outf, basestring):
        opened = True
        outf = open(outf, 'wb')
    else:
        opened = False
    # Get the list of everything before we start building new objects
    all_objs = gc.get_objects()
    # Dump out a few specific objects, so they don't get repeated forever
    nodump = [None, True, False]
    # In current versions of python, these are all pre-cached
    nodump.extend(xrange(-5, 256))
    nodump.extend([chr(c) for c in xrange(256)])
    nodump.extend([t for t in types.__dict__.itervalues()
                      if type(t) is types.TypeType])
    nodump.extend([set, dict])
    # Some very common interned strings
    nodump.extend(('__doc__', 'self', 'operator', '__init__', 'codecs',
                   '__new__', '__builtin__', '__builtins__', 'error', 'len',
                   'errors', 'keys', 'None', '__module__', 'file', 'name', '',
                   'sys', 'True', 'False'))
    nodump.extend((BaseException, Exception, StandardError, ValueError))
    for obj in nodump:
        _scanner.dump_object_info(outf, obj, nodump=None, recurse_depth=0)
    # Avoid dumping the all_objs list and this function as well. This helps
    # avoid getting a 'reference everything in existence' problem.
    nodump.append(dump_gc_objects)
    # This currently costs us ~16kB during dumping, but means we won't write
    # out those objects multiple times in the log file.
    # TODO: we might want to make nodump a variable-size dict, and add anything
    #       with ob_refcnt > 1000 or so.
    nodump = frozenset(nodump)
    for obj in all_objs:
        _scanner.dump_object_info(outf, obj, nodump=nodump,
                                  recurse_depth=recurse_depth)
    del all_objs[:]
    if opened:
        outf.close()
    else:
        outf.flush()