def test_select(self): # Ensure that "heap select" with no query does something sane src = TestSource() for i in range(3): src.add_malloc(1024) src.add_breakpoint() source = src.as_c_source() out = self.program_test('test_select', source, commands=[ 'run', 'heap select', ]) tables = ParsedTable.parse_lines(out) select_out = tables[0] # The "heap select" command should select all blocks: self.assertEquals( select_out.colnames, ('Start', 'End', 'Domain', 'Kind', 'Detail', 'Hexdump')) self.assertEquals(len(select_out.rows), 3) # Test that syntax errors are well handled: out = self.program_test('test_select', source, commands=[ 'run', 'heap select I AM A SYNTAX ERROR', ]) errmsg = ''' Parse error at "AM": I AM A SYNTAX ERROR ^^ ''' if errmsg not in out: self.fail('Did not find expected "ParseError" message in:\n%s' % out) # Test that unknown attributes are well-handled: out = self.program_test('test_select', source, commands=[ 'run', 'heap select NOT_AN_ATTRIBUTE > 42', ]) errmsg = ''' Unknown attribute "NOT_AN_ATTRIBUTE" (supported are domain,kind,detail,addr,start,size) at "NOT_AN_ATTRIBUTE": NOT_AN_ATTRIBUTE > 42 ^^^^^^^^^^^^^^^^ ''' if errmsg not in out: self.fail( 'Did not find expected "Unknown attribute" error message in:\n%s' % out) # Ensure that ply did not create debug files (ticket #12) for filename in ('parser.out', 'parsetab.py'): if os.path.exists(filename): self.fail('Unexpectedly found file %r' % filename)
def test_assertions(self): # Ensure that the domain-specific assertions work tables = ParsedTable.parse_lines(test_table) self.assertEquals(len(tables), 2) pt = tables[0] self.assertHasRow(pt, [('Domain', 'python'), ('Kind', 'str')]) self.assertRaises(RowNotFound, lambda: self.assertHasRow(pt, [('Domain', 'ruby')])) self.assertFoundCategory(pt, 'python', 'str') self.assertRaises(RowNotFound, lambda: self.assertFoundCategory(pt, 'ruby', 'class'))
def test_assertions(self): # Ensure that the domain-specific assertions work tables = ParsedTable.parse_lines(test_table) self.assertEquals(len(tables), 2) pt = tables[0] self.assertHasRow(pt, [('Domain', 'python'), ('Kind', 'str')]) self.assertRaises(RowNotFound, lambda: self.assertHasRow(pt, [('Domain', 'ruby')])) self.assertFoundCategory(pt, 'python', 'str') self.assertRaises( RowNotFound, lambda: self.assertFoundCategory(pt, 'ruby', 'class'))
def test_select(self): # Ensure that "heap select" with no query does something sane src = TestSource() for i in range(3): src.add_malloc(1024) src.add_breakpoint() source = src.as_c_source() out = self.program_test('test_select', source, commands=['run', 'heap select', ]) tables = ParsedTable.parse_lines(out) select_out = tables[0] # The "heap select" command should select all blocks: self.assertEquals(select_out.colnames, ('Start', 'End', 'Domain', 'Kind', 'Detail', 'Hexdump')) self.assertEquals(len(select_out.rows), 3) # Test that syntax errors are well handled: out = self.program_test('test_select', source, commands=['run', 'heap select I AM A SYNTAX ERROR', ]) errmsg = ''' Parse error at "AM": I AM A SYNTAX ERROR ^^ ''' if errmsg not in out: self.fail('Did not find expected "ParseError" message in:\n%s' % out) # Test that unknown attributes are well-handled: out = self.program_test('test_select', source, commands=['run', 'heap select NOT_AN_ATTRIBUTE > 42', ]) errmsg = ''' Unknown attribute "NOT_AN_ATTRIBUTE" (supported are domain,kind,detail,addr,start,size) at "NOT_AN_ATTRIBUTE": NOT_AN_ATTRIBUTE > 42 ^^^^^^^^^^^^^^^^ ''' if errmsg not in out: self.fail('Did not find expected "Unknown attribute" error message in:\n%s' % out) # Ensure that ply did not create debug files (ticket #12) for filename in ('parser.out', 'parsetab.py'): if os.path.exists(filename): self.fail('Unexpectedly found file %r' % filename)
def test_random_allocations(self): # Fuzz-testing: lots of allocations (of various sizes) # and deallocations src = TestSource() sizes = {} live_blocks = set() for i in range(100): action = random.randint(1, 100) # 70% chance of malloc: if action <= 70: size = self.random_size() varname = src.add_malloc(size, debug=True) sizes[varname] = size live_blocks.add(varname) if len(live_blocks) > 0: # 10% chance of realloc: if action in range(71, 80): size = self.random_size() old_varname = random.sample(live_blocks, 1)[0] live_blocks.remove(old_varname) new_varname = src.add_realloc(old_varname, size, debug=True) sizes[new_varname] = size live_blocks.add(new_varname) # 20% chance of freeing something: elif action > 80: varname = random.sample(live_blocks, 1)[0] live_blocks.remove(varname) src.add_free(varname) src.add_breakpoint() source = src.as_c_source() out = self.program_test('test_random_allocations', source, commands=(['run'] + ['heap select', 'cont'] * 100)) # We have 100 states of the inferior process; check that each was # reported as we expected it to be: tables = ParsedTable.parse_lines(out) self.assertEqual(len(tables), 100) for i in range(100): heap_select_out = tables[i] #print heap_select_out reported_addrs = set([ heap_select_out.get_cell(0, y) for y in range(len(heap_select_out.rows)) ])
def test_select_by_category(self): out = self.command_test(['python', '-c', 'id(42)'], commands=['set breakpoint pending yes', 'break builtin_id', 'run', 'heap select domain="python" and kind="str" and size > 512'], breakpoint='builtin_id') tables = ParsedTable.parse_lines(out) select_out = tables[0] # Ensure that the filtering mechanism worked: if len(select_out.rows) < 10: self.fail("Didn't find any large python strings (has something gone wrong?) in: %s" % select_out) for row in select_out.rows: self.assertEquals(row[2], 'python') self.assertEquals(row[3], 'str')
def test_random_allocations(self): # Fuzz-testing: lots of allocations (of various sizes) # and deallocations src = TestSource() sizes = {} live_blocks = set() for i in range(100): action = random.randint(1, 100) # 70% chance of malloc: if action <= 70: size = self.random_size() varname = src.add_malloc(size, debug=True) sizes[varname] = size live_blocks.add(varname) if len(live_blocks) > 0: # 10% chance of realloc: if action in range(71, 80): size = self.random_size() old_varname = random.sample(live_blocks, 1)[0] live_blocks.remove(old_varname) new_varname = src.add_realloc(old_varname, size, debug=True) sizes[new_varname] = size live_blocks.add(new_varname) # 20% chance of freeing something: elif action > 80: varname = random.sample(live_blocks, 1)[0] live_blocks.remove(varname) src.add_free(varname) src.add_breakpoint() source = src.as_c_source() out = self.program_test('test_random_allocations', source, commands=(['run'] + ['heap select', 'cont'] * 100)) # We have 100 states of the inferior process; check that each was # reported as we expected it to be: tables = ParsedTable.parse_lines(out) self.assertEqual(len(tables), 100) for i in range(100): heap_select_out = tables[i] #print heap_select_out reported_addrs = set([heap_select_out.get_cell(0, y) for y in range(len(heap_select_out.rows))])
def test_select_by_size(self): src = TestSource() # Allocate ten 1kb blocks, nine 2kb blocks, etc, down to one 10kb # block so that we can easily query them by size: for i in range(10): for j in range(10-i): size = 1024 * (i+1) src.add_malloc(size) src.add_breakpoint() source = src.as_c_source() out = self.program_test('test_select_by_size', source, commands=['run', 'heap', 'heap select size >= 10240', # (parsed as "largest_out" below) 'heap select size < 2048', # (parsed as "smallest_out" below) 'heap select size >= 4096 and size < 8192', # (parsed as "middle_out" below) ]) tables = ParsedTable.parse_lines(out) heap_out = tables[0] largest_out = tables[1] smallest_out = tables[2] middle_out = tables[3] # The "heap" command should find all the allocations: self.assertHasRow(heap_out, [('Detail', 'TOTAL'), ('Count', 55)]) # The query for the largest should find just one allocation: self.assertEquals(len(largest_out.rows), 1) # The query for the smallest should find ten allocations: self.assertEquals(len(smallest_out.rows), 10) # The middle query [4096, 8192) should capture the following # allocations: # 7 of (4*4096), 6 of (5*4096), 5 of (6*4096) and 4 of (7*4096) # giving a total count of 7+6+5+4 = 22 self.assertEquals(len(middle_out.rows), 22)
def test_gobject(self): out = self.command_test( ['gtk-demo'], commands=[ 'set breakpoint pending yes', 'set environment G_SLICE=always-malloc', # for now 'break gtk_main', 'run', 'heap', ]) # print out tables = ParsedTable.parse_lines(out) heap_out = tables[0] # Ensure that instances of GObject classes are categorized: self.assertFoundCategory(heap_out, 'GType', 'GtkTreeView') self.assertFoundCategory(heap_out, 'GType', 'GtkLabel') # Ensure that instances of fundamental boxed types are categorized: self.assertFoundCategory(heap_out, 'GType', 'gchar') self.assertFoundCategory(heap_out, 'GType', 'guint') # Ensure that the code detected buffers used by the GLib/GTK types: self.assertFoundCategory(heap_out, 'GType', 'GdkPixbuf pixels', '107w x 140h') # GdkImage -> X11 Images -> data: self.assertFoundCategory(heap_out, 'GType', 'GdkImage') self.assertFoundCategory(heap_out, 'X11', 'Image') if False: # Only seen whilst using X forwarded over ssh: self.assertFoundCategory(heap_out, 'X11', 'Image data') # In both above rows, "Detail" contains the exact dimensions, but these # seem to vary with the resolution of the display the test is run # against # FreeType: # These seem to be highly dependent on the environment; I originally # developed this whilst using X forwarded over ssh if False: self.assertFoundCategory(heap_out, 'GType', 'PangoCairoFcFontMap') self.assertFoundCategory(heap_out, 'FreeType', 'Library') self.assertFoundCategory(heap_out, 'FreeType', 'raster_pool')
def test_cplusplus(self): '''Verify that we can detect and categorize instances of C++ classes''' # Note that C++ detection is currently disabled due to a bug in execution capture src = TestSource() src.decls += ''' class Foo { public: virtual ~Foo() {} int f1; int f2; }; class Bar : Foo { public: virtual ~Bar() {} int f1; // Ensure that Bar has a different allocated size to Foo, on every arch: int buffer[256]; }; ''' for i in range(100): src.add_line('{Foo *f = new Foo();}') if i % 2: src.add_line('{Bar *b = new Bar();}') src.add_breakpoint() source = src.as_c_source() out = self.program_test('test_cplusplus', source, is_cplusplus=True, commands=['run', 'heap sizes', 'heap']) tables = ParsedTable.parse_lines(out) heap_sizes_out = tables[0] heap_out = tables[1] # We ought to have 150 live blocks on the heap: self.assertHasRow(heap_out, [('Detail', 'TOTAL'), ('Count', 150)]) # Use the differing counts of the blocks to locate the objects # FIXME: change the "Domain" values below and add "Kind" once C++ # identification is re-enabled: self.assertHasRow(heap_out, [('Count', 100), ('Domain', 'uncategorized')]) self.assertHasRow(heap_out, [('Count', 50), ('Domain', 'uncategorized')])
def test_select_by_size(self): src = TestSource() # Allocate ten 1kb blocks, nine 2kb blocks, etc, down to one 10kb # block so that we can easily query them by size: for i in range(10): for j in range(10 - i): size = 1024 * (i + 1) src.add_malloc(size) src.add_breakpoint() source = src.as_c_source() out = self.program_test( 'test_select_by_size', source, commands=[ 'run', 'heap', 'heap select size >= 10240', # (parsed as "largest_out" below) 'heap select size < 2048', # (parsed as "smallest_out" below) 'heap select size >= 4096 and size < 8192', # (parsed as "middle_out" below) ]) tables = ParsedTable.parse_lines(out) heap_out = tables[0] largest_out = tables[1] smallest_out = tables[2] middle_out = tables[3] # The "heap" command should find all the allocations: self.assertHasRow(heap_out, [('Detail', 'TOTAL'), ('Count', 55)]) # The query for the largest should find just one allocation: self.assertEquals(len(largest_out.rows), 1) # The query for the smallest should find ten allocations: self.assertEquals(len(smallest_out.rows), 10) # The middle query [4096, 8192) should capture the following # allocations: # 7 of (4*4096), 6 of (5*4096), 5 of (6*4096) and 4 of (7*4096) # giving a total count of 7+6+5+4 = 22 self.assertEquals(len(middle_out.rows), 22)
def test_gobject(self): out = self.command_test(['gtk-demo'], commands=['set breakpoint pending yes', 'set environment G_SLICE=always-malloc', # for now 'break gtk_main', 'run', 'heap', ]) # print out tables = ParsedTable.parse_lines(out) heap_out = tables[0] # Ensure that instances of GObject classes are categorized: self.assertFoundCategory(heap_out, 'GType', 'GtkTreeView') self.assertFoundCategory(heap_out, 'GType', 'GtkLabel') # Ensure that instances of fundamental boxed types are categorized: self.assertFoundCategory(heap_out, 'GType', 'gchar') self.assertFoundCategory(heap_out, 'GType', 'guint') # Ensure that the code detected buffers used by the GLib/GTK types: self.assertFoundCategory(heap_out, 'GType', 'GdkPixbuf pixels', '107w x 140h') # GdkImage -> X11 Images -> data: self.assertFoundCategory(heap_out, 'GType', 'GdkImage') self.assertFoundCategory(heap_out, 'X11', 'Image') if False: # Only seen whilst using X forwarded over ssh: self.assertFoundCategory(heap_out, 'X11', 'Image data') # In both above rows, "Detail" contains the exact dimensions, but these # seem to vary with the resolution of the display the test is run # against # FreeType: # These seem to be highly dependent on the environment; I originally # developed this whilst using X forwarded over ssh if False: self.assertFoundCategory(heap_out, 'GType', 'PangoCairoFcFontMap') self.assertFoundCategory(heap_out, 'FreeType', 'Library') self.assertFoundCategory(heap_out, 'FreeType', 'raster_pool')
def test_select_by_category(self): out = self.command_test( ['python', '-c', 'id(42)'], commands=[ 'set breakpoint pending yes', 'break builtin_id', 'run', 'heap select domain="python" and kind="str" and size > 512' ], breakpoint='builtin_id') tables = ParsedTable.parse_lines(out) select_out = tables[0] # Ensure that the filtering mechanism worked: if len(select_out.rows) < 10: self.fail( "Didn't find any large python strings (has something gone wrong?) in: %s" % select_out) for row in select_out.rows: self.assertEquals(row[2], 'python') self.assertEquals(row[3], 'str')
def test_pypy(self): # Try to investigate memory usage of pypy-c # Developed using pypy-1.4.1 as packaged on Fedora. # # In order to get meaningful data, let's try to trap the exit point # of pypy-c within gdb. # # For now, lets try to put a breakpoint in this location within the # generated "pypy_g_entry_point" C function: # print_stats:158 : debug_stop("jit-summary") out = self.command_test(['pypy', 'object-sizes.py'], commands=['set breakpoint pending yes', 'break pypy_debug_stop', 'condition 1 0==strcmp(category, "jit-summary")', 'run', 'heap', ]) tables = ParsedTable.parse_lines(out) select_out = tables[0]
def test_pypy(self): # Try to investigate memory usage of pypy-c # Developed using pypy-1.4.1 as packaged on Fedora. # # In order to get meaningful data, let's try to trap the exit point # of pypy-c within gdb. # # For now, lets try to put a breakpoint in this location within the # generated "pypy_g_entry_point" C function: # print_stats:158 : debug_stop("jit-summary") out = self.command_test( ['pypy', 'object-sizes.py'], commands=[ 'set breakpoint pending yes', 'break pypy_debug_stop', 'condition 1 0==strcmp(category, "jit-summary")', 'run', 'heap', ]) tables = ParsedTable.parse_lines(out) select_out = tables[0]
def _impl_test_python(self, pyruntime, py3k): # Test that we can debug CPython's memory usage, for a given runtime # Invoke a test python script, stopping at a breakpoint out = self.command_test([pyruntime, 'object-sizes.py'], commands=['set breakpoint pending yes', 'break builtin_id', 'run', 'heap cpython-allocators', 'heap', 'heap select kind="PyListObject ob_item table"'], breakpoint='builtin_id') # Re-enable this for debugging: # print out tables = ParsedTable.parse_lines(out) # Verify that "cpython-allocators" works: allocators_out = tables[0] self.assertEquals(allocators_out.colnames, ('struct arena_object*', '256KB buffer location', 'Free pools')) # print allocators_out # self.assertHasRow(allocators_out, # kvs = [('Domain', 'cpython'), # ('Kind', 'PyListObject ob_item table')]) heap_out = tables[1] # Verify that "select" works for a category that's only detectable # w.r.t. other categories: select_out = tables[2] # print select_out self.assertHasRow(select_out, kvs = [('Domain', 'cpython'), ('Kind', 'PyListObject ob_item table')]) # Ensure that the code detected instances of various python types we # expect to be present: for kind in ('str', 'list', 'tuple', 'dict', 'type', 'code', 'set', 'frozenset', 'function', 'module', 'frame', ): self.assertFoundCategory(heap_out, 'python', kind) if py3k: self.assertFoundCategory(heap_out, 'python', 'bytes') else: self.assertFoundCategory(heap_out, 'python', 'unicode') # Ensure that the blocks of int allocations are detected: if not py3k: self.assertFoundCategory(heap_out, 'cpython', '_intblock', '') # Ensure that bytecode "strings" are marked as such: self.assertFoundCategory(heap_out, 'python', 'str', 'bytecode') # FIXME # Ensure that old-style classes are printed with a meaningful name # (i.e. not just "type"): if not py3k: for clsname in ('OldStyle', 'OldStyleManyAttribs'): self.assertFoundCategory(heap_out, 'python', clsname, 'old-style') # ...and that their instance dicts are marked: self.assertFoundCategory(heap_out, 'cpython', 'PyDictObject', '%s.__dict__' % clsname) # ...and that an old-style instance with enough attributes to require a # separate PyDictEntry buffer for its __dict__ has that buffer marked # with the typename: self.assertFoundCategory(heap_out, 'cpython', 'PyDictEntry table', 'OldStyleManyAttribs.__dict__') # Likewise for new-style classes: for clsname in ('NewStyle', 'NewStyleManyAttribs'): self.assertHasRow(heap_out, [('Domain', 'python'), ('Kind', clsname), ('Detail', None)]) self.assertFoundCategory(heap_out, 'python', 'dict', '%s.__dict__' % clsname) self.assertFoundCategory(heap_out, 'cpython', 'PyDictEntry table', 'NewStyleManyAttribs.__dict__') # Ensure that the code detected buffers used by python types: for kind in ('PyDictEntry table', 'PyListObject ob_item table', 'PySetObject setentry table', 'PyUnicodeObject buffer', 'PyDictEntry table'): self.assertFoundCategory(heap_out, 'cpython', kind) # and of other types: self.assertFoundCategory(heap_out, 'C', 'string data') self.assertFoundCategory(heap_out, 'pyarena', 'pool_header overhead') # Ensure that the "interned" table is identified (it's typically # at least 200k on a 64-bit build): self.assertHasRow(heap_out, [('Domain', 'cpython'), ('Kind', 'PyDictEntry table'), ('Detail', 'interned'), ('Count', 1)]) # Ensure that we detect python sqlite3 objects: for kind in ('sqlite3.Connection', 'sqlite3.Statement', 'sqlite3.Cache'): self.assertFoundCategory(heap_out, 'python', kind) # ...and that we detect underlying sqlite3 buffers: for kind in ('sqlite3', 'sqlite3_stmt'): self.assertFoundCategory(heap_out, 'sqlite3', kind)
def _impl_test_python(self, pyruntime, py3k): # Test that we can debug CPython's memory usage, for a given runtime # Invoke a test python script, stopping at a breakpoint out = self.command_test( [pyruntime, 'object-sizes.py'], commands=[ 'set breakpoint pending yes', 'break builtin_id', 'run', 'heap cpython-allocators', 'heap', 'heap select kind="PyListObject ob_item table"' ], breakpoint='builtin_id') # Re-enable this for debugging: # print out tables = ParsedTable.parse_lines(out) # Verify that "cpython-allocators" works: allocators_out = tables[0] self.assertEquals( allocators_out.colnames, ('struct arena_object*', '256KB buffer location', 'Free pools')) # print allocators_out # self.assertHasRow(allocators_out, # kvs = [('Domain', 'cpython'), # ('Kind', 'PyListObject ob_item table')]) heap_out = tables[1] # Verify that "select" works for a category that's only detectable # w.r.t. other categories: select_out = tables[2] # print select_out self.assertHasRow(select_out, kvs=[('Domain', 'cpython'), ('Kind', 'PyListObject ob_item table')]) # Ensure that the code detected instances of various python types we # expect to be present: for kind in ( 'str', 'list', 'tuple', 'dict', 'type', 'code', 'set', 'frozenset', 'function', 'module', 'frame', ): self.assertFoundCategory(heap_out, 'python', kind) if py3k: self.assertFoundCategory(heap_out, 'python', 'bytes') else: self.assertFoundCategory(heap_out, 'python', 'unicode') # Ensure that the blocks of int allocations are detected: if not py3k: self.assertFoundCategory(heap_out, 'cpython', '_intblock', '') # Ensure that bytecode "strings" are marked as such: self.assertFoundCategory(heap_out, 'python', 'str', 'bytecode') # FIXME # Ensure that old-style classes are printed with a meaningful name # (i.e. not just "type"): if not py3k: for clsname in ('OldStyle', 'OldStyleManyAttribs'): self.assertFoundCategory(heap_out, 'python', clsname, 'old-style') # ...and that their instance dicts are marked: self.assertFoundCategory(heap_out, 'cpython', 'PyDictObject', '%s.__dict__' % clsname) # ...and that an old-style instance with enough attributes to require a # separate PyDictEntry buffer for its __dict__ has that buffer marked # with the typename: self.assertFoundCategory(heap_out, 'cpython', 'PyDictEntry table', 'OldStyleManyAttribs.__dict__') # Likewise for new-style classes: for clsname in ('NewStyle', 'NewStyleManyAttribs'): self.assertHasRow(heap_out, [('Domain', 'python'), ('Kind', clsname), ('Detail', None)]) self.assertFoundCategory(heap_out, 'python', 'dict', '%s.__dict__' % clsname) self.assertFoundCategory(heap_out, 'cpython', 'PyDictEntry table', 'NewStyleManyAttribs.__dict__') # Ensure that the code detected buffers used by python types: for kind in ('PyDictEntry table', 'PyListObject ob_item table', 'PySetObject setentry table', 'PyUnicodeObject buffer', 'PyDictEntry table'): self.assertFoundCategory(heap_out, 'cpython', kind) # and of other types: self.assertFoundCategory(heap_out, 'C', 'string data') self.assertFoundCategory(heap_out, 'pyarena', 'pool_header overhead') # Ensure that the "interned" table is identified (it's typically # at least 200k on a 64-bit build): self.assertHasRow(heap_out, [('Domain', 'cpython'), ('Kind', 'PyDictEntry table'), ('Detail', 'interned'), ('Count', 1)]) # Ensure that we detect python sqlite3 objects: for kind in ('sqlite3.Connection', 'sqlite3.Statement', 'sqlite3.Cache'): self.assertFoundCategory(heap_out, 'python', kind) # ...and that we detect underlying sqlite3 buffers: for kind in ('sqlite3', 'sqlite3_stmt'): self.assertFoundCategory(heap_out, 'sqlite3', kind)