def dump_all_referenced(outf, obj, is_pending=False): """Recursively dump everything that is referenced from obj.""" if isinstance(outf, six.string_types): outf = open(outf, 'wb') if is_pending: pending = obj else: pending = [obj] last_offset = len(pending) - 1 # TODO: Instead of using an IDSet, we could use a BloomFilter. It would # mean some objects may not get dumped (blooms say "yes you # definitely are not present", but only "you might already be # present", collisions cause false positives.) # However, you can get by with 8-10bits for a 1% FPR, rather than # using 32/64-bit pointers + overhead for avoiding hash collisions. # So on 64-bit we drop from 16bytes/object to 1... seen = _intset.IDSet() if is_pending: seen.add(id(pending)) while last_offset >= 0: next = pending[last_offset] last_offset -= 1 id_next = id(next) if id_next in seen: continue seen.add(id_next) # We will recurse here, so tell dump_object_info to not recurse _scanner.dump_object_info(outf, next, recurse_depth=0) for ref in get_referents(next): if id(ref) not in seen: last_offset += 1 if len(pending) > last_offset: pending[last_offset] = ref else: pending.append(ref)
def test_dump_large_int(self): t = tempfile.TemporaryFile(prefix='meliae-') t_file = getattr(t, 'file', t) try: _scanner.dump_object_info(t_file, 2**80) except OverflowError: self.fail('OveflowError raised')
def dump_all_referenced(outf, obj, is_pending=False): """Recursively dump everything that is referenced from obj.""" if isinstance(outf, str): outf = open(outf, 'wb') if is_pending: pending = obj else: pending = [obj] last_offset = len(pending) - 1 # TODO: Instead of using an IDSet, we could use a BloomFilter. It would # mean some objects may not get dumped (blooms say "yes you # definitely are not present", but only "you might already be # present", collisions cause false positives.) # However, you can get by with 8-10bits for a 1% FPR, rather than # using 32/64-bit pointers + overhead for avoiding hash collisions. # So on 64-bit we drop from 16bytes/object to 1... seen = _intset.IDSet() if is_pending: seen.add(id(pending)) while last_offset >= 0: next = pending[last_offset] last_offset -= 1 id_next = id(next) if id_next in seen: continue seen.add(id_next) # We will recurse here, so tell dump_object_info to not recurse _scanner.dump_object_info(outf, next, recurse_depth=0) for ref in get_referents(next): if id(ref) not in seen: last_offset += 1 if len(pending) > last_offset: pending[last_offset] = ref else: pending.append(ref)
def assertDumpInfo(self, obj, nodump=None): t = tempfile.TemporaryFile(prefix='meliae-') # On some platforms TemporaryFile returns a wrapper object with 'file' # being the real object, on others, the returned object *is* the real # file object t_file = getattr(t, 'file', t) _scanner.dump_object_info(t_file, obj, nodump=nodump) t.seek(0) as_bytes = t.read() self.assertEqual(py_dump_object_info(obj, nodump=nodump), as_bytes) as_list = [] _scanner.dump_object_info(as_list.append, obj, nodump=nodump) self.assertEqual(as_bytes, ''.join(as_list))
def dump_gc_objects(outf, recurse_depth=1): """Dump everything that is available via gc.get_objects(). """ if isinstance(outf, six.string_types): opened = True outf = open(outf, 'wb') else: opened = False # Get the list of everything before we start building new objects all_objs = gc.get_objects() # Dump out a few specific objects, so they don't get repeated forever nodump = [None, True, False] # In current versions of python, these are all pre-cached nodump.extend(range(-5, 256)) nodump.extend([chr(c) for c in range(256)]) nodump.extend( [t for t in types.__dict__.itervalues() if type(t) is types.TypeType]) nodump.extend([set, dict]) # Some very common interned strings nodump.extend( ('__doc__', 'self', 'operator', '__init__', 'codecs', '__new__', '__builtin__', '__builtins__', 'error', 'len', 'errors', 'keys', 'None', '__module__', 'file', 'name', '', 'sys', 'True', 'False')) nodump.extend((BaseException, Exception, StandardError, ValueError)) for obj in nodump: _scanner.dump_object_info(outf, obj, nodump=None, recurse_depth=0) # Avoid dumping the all_objs list and this function as well. This helps # avoid getting a 'reference everything in existence' problem. nodump.append(dump_gc_objects) # This currently costs us ~16kB during dumping, but means we won't write # out those objects multiple times in the log file. # TODO: we might want to make nodump a variable-size dict, and add anything # with ob_refcnt > 1000 or so. nodump = frozenset(nodump) for obj in all_objs: _scanner.dump_object_info(outf, obj, nodump=nodump, recurse_depth=recurse_depth) del all_objs[:] if opened: outf.close() else: outf.flush()
def dump_gc_objects(outf, recurse_depth=1): """Dump everything that is available via gc.get_objects(). """ if isinstance(outf, basestring): opened = True outf = open(outf, 'wb') else: opened = False # Get the list of everything before we start building new objects all_objs = gc.get_objects() # Dump out a few specific objects, so they don't get repeated forever nodump = [None, True, False] # In current versions of python, these are all pre-cached nodump.extend(xrange(-5, 256)) nodump.extend([chr(c) for c in xrange(256)]) nodump.extend([t for t in types.__dict__.itervalues() if type(t) is types.TypeType]) nodump.extend([set, dict]) # Some very common interned strings nodump.extend(('__doc__', 'self', 'operator', '__init__', 'codecs', '__new__', '__builtin__', '__builtins__', 'error', 'len', 'errors', 'keys', 'None', '__module__', 'file', 'name', '', 'sys', 'True', 'False')) nodump.extend((BaseException, Exception, StandardError, ValueError)) for obj in nodump: _scanner.dump_object_info(outf, obj, nodump=None, recurse_depth=0) # Avoid dumping the all_objs list and this function as well. This helps # avoid getting a 'reference everything in existence' problem. nodump.append(dump_gc_objects) # This currently costs us ~16kB during dumping, but means we won't write # out those objects multiple times in the log file. # TODO: we might want to make nodump a variable-size dict, and add anything # with ob_refcnt > 1000 or so. nodump = frozenset(nodump) for obj in all_objs: _scanner.dump_object_info(outf, obj, nodump=nodump, recurse_depth=recurse_depth) del all_objs[:] if opened: outf.close() else: outf.flush()