def runTest(self): rule_tree = native_heap_classifier.LoadRules(_TEST_RULES) nheap = native_heap.NativeHeap() mock_addr = 0 for test_entry in _TEST_STACK_TRACES: mock_strace = stacktrace.Stacktrace() for mock_btstr in test_entry[1]: mock_addr += 4 # Addr is irrelevant, just keep it distinct. mock_frame = stacktrace.Frame(mock_addr) mock_frame.SetSymbolInfo(symbol.Symbol(mock_btstr)) mock_strace.Add(mock_frame) nheap.Add( native_heap.Allocation(size=test_entry[0], count=1, stack_trace=mock_strace)) res = native_heap_classifier.Classify(nheap, rule_tree) def CheckResult(node, prefix): node_name = prefix + node.name self.assertIn(node_name, _EXPECTED_RESULTS) self.assertEqual(node.values, _EXPECTED_RESULTS[node_name]) for child in node.children: CheckResult(child, node_name + '::') CheckResult(res.total, '')
def testStandardRuleParsingAndProcessing(self): rule_tree = native_heap_classifier.LoadRules(_TEST_RULES) nheap = native_heap.NativeHeap() mock_addr = 0 for test_entry in _TEST_STACK_TRACES: mock_strace = stacktrace.Stacktrace() for (mock_btstr, mock_source_path) in test_entry[1]: mock_addr += 4 # Addr is irrelevant, just keep it distinct. mock_frame = stacktrace.Frame(mock_addr) mock_frame.SetSymbolInfo(symbol.Symbol(mock_btstr, mock_source_path)) mock_strace.Add(mock_frame) nheap.Add(native_heap.Allocation( size=test_entry[0], count=1, stack_trace=mock_strace)) res = native_heap_classifier.Classify(nheap, rule_tree) self._CheckResult(res.total, '', _EXPECTED_RESULTS)
def testInferHeuristicRules(self): nheap = native_heap.NativeHeap() mock_addr = 0 for (mock_alloc_size, mock_source_path) in _HEURISTIC_TEST_STACK_TRACES: mock_strace = stacktrace.Stacktrace() mock_addr += 4 # Addr is irrelevant, just keep it distinct. mock_frame = stacktrace.Frame(mock_addr) mock_frame.SetSymbolInfo(symbol.Symbol(str(mock_addr), mock_source_path)) for _ in xrange(10): # Just repeat the same stack frame 10 times mock_strace.Add(mock_frame) nheap.Add(native_heap.Allocation( size=mock_alloc_size, count=1, stack_trace=mock_strace)) rule_tree = native_heap_classifier.InferHeuristicRulesFromHeap( nheap, threshold=0.05) res = native_heap_classifier.Classify(nheap, rule_tree) self._CheckResult(res.total, '', _HEURISTIC_EXPECTED_RESULTS)
def testNativeHeap(self): archive = self._storage.OpenArchive('nheap', create=True) timestamp = archive.StartNewSnapshot() nh = native_heap.NativeHeap() for i in xrange(1, 4): stack_trace = stacktrace.Stacktrace() frame = nh.GetStackFrame(i * 10 + 1) frame.SetExecFileInfo('foo.so', 1) stack_trace.Add(frame) frame = nh.GetStackFrame(i * 10 + 2) frame.SetExecFileInfo('bar.so', 2) stack_trace.Add(frame) nh.Add(native_heap.Allocation(i * 2, i * 3, stack_trace)) archive.StoreNativeHeap(nh) nh_deser = archive.LoadNativeHeap(timestamp) self._DeepCompare(nh, nh_deser) self._storage.DeleteArchive('nheap')
def decode(self, json_str): # pylint: disable=W0221 d = super(NativeHeapDecoder, self).decode(json_str) nh = native_heap.NativeHeap() # First load and rebuild the stack_frame index. for frame_dict in d['stack_frames']: frame = nh.GetStackFrame(frame_dict['address']) frame.SetExecFileInfo(frame_dict['exec_file_rel_path'], frame_dict['offset']) # Then load backtraces (reusing stack frames from the index above). for alloc_dict in d['allocations']: stack_trace = stacktrace.Stacktrace() for absolute_addr in alloc_dict['stack_trace']: stack_trace.Add(nh.GetStackFrame(absolute_addr)) allocation = native_heap.Allocation(alloc_dict['size'], alloc_dict['count'], stack_trace) nh.Add(allocation) return nh
def Parse(content): """Parses the output of the heap_dump binary (part of libheap_profiler). heap_dump provides a conveniente JSON output. See the header of tools/android/heap_profiler/heap_dump.c for more details. Args: content: string containing the command output. Returns: An instance of |native_heap.NativeHeap|. """ data = json.loads(content) assert ('allocs' in data), 'Need to run heap_dump with the -x (extended) arg.' nativeheap = native_heap.NativeHeap() strace_by_index = {} # index (str) -> |stacktrace.Stacktrace| for index, entry in data['stacks'].iteritems(): strace = stacktrace.Stacktrace() for absolute_addr in entry['f']: strace.Add(nativeheap.GetStackFrame(absolute_addr)) strace_by_index[index] = strace for start_addr, entry in data['allocs'].iteritems(): flags = int(entry['f']) # TODO(primiano): For the moment we just skip completely the allocations # made in the Zygote (pre-fork) because this is usually reasonable. In the # near future we will expose them with some UI to selectively filter them. if flags & FLAGS_IN_ZYGOTE: continue nativeheap.Add( native_heap.Allocation(size=entry['l'], stack_trace=strace_by_index[entry['s']], start=int(start_addr, 16), flags=flags)) return nativeheap
def Parse(lines): """Parses the output of Android's am dumpheap -n. am dumpheap dumps the oustanding malloc information (when the system property libc.debug.malloc == 1). The expected dumpheap output looks like this: ------------------------------------------------------------------------------ ... Some irrelevant banner lines ... z 0 sz 1000 num 3 bt 1234 5678 9abc ... ... MAPS 9dcd0000-9dcd6000 r-xp 00000000 103:00 815 /system/lib/libnbaio.so ... ------------------------------------------------------------------------------ The lines before MAPS list the allocations grouped by {size, backtrace}. In the example above, "1000" is the size of each alloc, "3" is their cardinality and "1234 5678 9abc" are the first N stack frames (absolute addresses in the process virtual address space). The lines after MAPS provide essentially the same information of /proc/PID/smaps. See tests/android_backend_test.py for a more complete example. Args: lines: array of strings containing the am dumpheap -n output. Returns: An instance of |native_heap.NativeHeap|. """ (STATE_PARSING_BACKTRACES, STATE_PARSING_MAPS, STATE_ENDED) = range(3) BT_RE = re.compile( r'^\w+\s+\d+\s+sz\s+(\d+)\s+num\s+(\d+)\s+bt\s+((?:[0-9a-f]+\s?)+)$') MAP_RE = re.compile( r'^([0-9a-f]+)-([0-9a-f]+)\s+....\s*([0-9a-f]+)\s+\w+:\w+\s+\d+\s*(.*)$') state = STATE_PARSING_BACKTRACES skip_first_n_lines = 5 mmap = memory_map.Map() stack_frames = {} # absolute_address (int) -> |stacktrace.Frame|. nativeheap = native_heap.NativeHeap() for line in lines: line = line.rstrip('\r\n') if skip_first_n_lines > 0: skip_first_n_lines -= 1 continue if state == STATE_PARSING_BACKTRACES: if line == 'MAPS': state = STATE_PARSING_MAPS continue m = BT_RE.match(line) if not m: logging.warning('Skipping unrecognized dumpheap alloc: "%s"' % line) continue alloc_size = int(m.group(1)) alloc_count = int(m.group(2)) alloc_bt_str = m.group(3) strace = stacktrace.Stacktrace() # Keep only one |stacktrace.Frame| per distinct |absolute_addr|, in order # to ease the complexity of the final de-offset pass. for absolute_addr in alloc_bt_str.split(): absolute_addr = int(absolute_addr, 16) stack_frame = stack_frames.get(absolute_addr) if not stack_frame: stack_frame = stacktrace.Frame(absolute_addr) stack_frames[absolute_addr] = stack_frame strace.Add(stack_frame) nativeheap.Add(native_heap.Allocation(alloc_size, alloc_count, strace)) # The am dumpheap output contains also a list of mmaps. This information is # used in this module for the only purpose of normalizing addresses (i.e. # translating an absolute addr into its offset inside the mmap-ed library). # The mmap information is not further retained. A more complete mmap dump is # performed (and retained) using the memdump tool (see memdump_parser.py). elif state == STATE_PARSING_MAPS: if line == 'END': state = STATE_ENDED continue m = MAP_RE.match(line) if not m: logging.warning('Skipping unrecognized dumpheap mmap: "%s"' % line) continue mmap.Add(memory_map.MapEntry( start=int(m.group(1), 16), end=int(m.group(2), 16), prot_flags='----', # Not really needed for lookup mapped_file=m.group(4), mapped_offset=int(m.group(3), 16))) elif state == STATE_ENDED: pass else: assert(False) # Final pass: translate all the stack frames' absolute addresses into # relative offsets (exec_file + offset) using the memory maps just processed. for abs_addr, stack_frame in stack_frames.iteritems(): assert(abs_addr == stack_frame.address) map_entry = mmap.Lookup(abs_addr) if not map_entry: continue stack_frame.SetExecFileInfo(map_entry.mapped_file, map_entry.GetRelativeOffset(abs_addr)) return nativeheap
def runTest(self): nheap = native_heap.NativeHeap() EXE_1_MM_BASE = 64 * PAGE_SIZE EXE_2_MM_BASE = 65 * PAGE_SIZE EXE_2_FILE_OFF = 8192 st1 = stacktrace.Stacktrace() st1.Add(nheap.GetStackFrame(EXE_1_MM_BASE)) st1.Add(nheap.GetStackFrame(EXE_1_MM_BASE + 4)) st2 = stacktrace.Stacktrace() st2.Add(nheap.GetStackFrame(EXE_1_MM_BASE)) st2.Add(nheap.GetStackFrame(EXE_2_MM_BASE + 4)) st2.Add(nheap.GetStackFrame(EXE_2_MM_BASE + PAGE_SIZE + 4)) # Check that GetStackFrames keeps one unique object instance per address. # This is to guarantee that the symbolization logic (SymbolizeUsingSymbolDB) # can cheaply iterate on distinct stack frames rather than re-processing # every stack frame for each allocation (and save memory as well). self.assertIs(st1[0], st2[0]) self.assertIsNot(st1[0], st1[1]) self.assertIsNot(st2[0], st2[1]) alloc1 = native_heap.Allocation(start=4, size=4, stack_trace=st1) alloc2 = native_heap.Allocation(start=4090, size=8, stack_trace=st1) alloc3 = native_heap.Allocation(start=8190, size=10000, stack_trace=st2) nheap.Add(alloc1) nheap.Add(alloc2) nheap.Add(alloc3) self.assertEqual(len(nheap.allocations), 3) self.assertIn(alloc1, nheap.allocations) self.assertIn(alloc2, nheap.allocations) self.assertIn(alloc3, nheap.allocations) ############################################################################ # Test the relativization (absolute address -> mmap + offset) logic. ############################################################################ mmap = memory_map mmap = memory_map.Map() mmap.Add( memory_map.MapEntry(EXE_1_MM_BASE, EXE_1_MM_BASE + PAGE_SIZE - 1, 'rw--', '/d/exe1', 0)) mmap.Add( memory_map.MapEntry(EXE_2_MM_BASE, EXE_2_MM_BASE + PAGE_SIZE - 1, 'rw--', 'exe2', EXE_2_FILE_OFF)) # Entry for EXE_3 is deliberately missing to check the fallback behavior. nheap.RelativizeStackFrames(mmap) self.assertEqual(st1[0].exec_file_rel_path, '/d/exe1') self.assertEqual(st1[0].exec_file_name, 'exe1') self.assertEqual(st1[0].offset, 0) self.assertEqual(st1[1].exec_file_rel_path, '/d/exe1') self.assertEqual(st1[1].exec_file_name, 'exe1') self.assertEqual(st1[1].offset, 4) self.assertEqual(st2[0].exec_file_rel_path, '/d/exe1') self.assertEqual(st2[0].exec_file_name, 'exe1') self.assertEqual(st2[0].offset, 0) self.assertEqual(st2[1].exec_file_rel_path, 'exe2') self.assertEqual(st2[1].exec_file_name, 'exe2') self.assertEqual(st2[1].offset, 4 + EXE_2_FILE_OFF) self.assertIsNone(st2[2].exec_file_rel_path) self.assertIsNone(st2[2].exec_file_name) self.assertIsNone(st2[2].offset) ############################################################################ # Test the symbolization logic. ############################################################################ syms = symbol.Symbols() syms.Add('/d/exe1', 0, symbol.Symbol('sym1', 'src1.c', 1)) # st1[0] syms.Add('exe2', 4 + EXE_2_FILE_OFF, symbol.Symbol('sym3')) # st2[1] nheap.SymbolizeUsingSymbolDB(syms) self.assertEqual(st1[0].symbol.name, 'sym1') self.assertEqual(st1[0].symbol.source_info[0].source_file_path, 'src1.c') self.assertEqual(st1[0].symbol.source_info[0].line_number, 1) # st1[1] should have no symbol info, because we didn't provide any above. self.assertIsNone(st1[1].symbol) # st2[0] and st1[0] were the same Frame. Expect identical symbols instances. self.assertIs(st2[0].symbol, st1[0].symbol) # st2[1] should have a symbols name, but no source line info. self.assertEqual(st2[1].symbol.name, 'sym3') self.assertEqual(len(st2[1].symbol.source_info), 0) # st2[2] should have no sym because we didn't even provide a mmap for exe3. self.assertIsNone(st2[2].symbol) ############################################################################ # Test the resident size calculation logic (intersects mmaps and allocs). ############################################################################ mmap.Add( memory_map.MapEntry(0, 8191, 'rw--', '', 0, resident_pages=[1])) mmap.Add( memory_map.MapEntry(8192, 12287, 'rw--', '', 0, resident_pages=[1])) # [12k, 16k] is deliberately missing to check the fallback behavior. mmap.Add( memory_map.MapEntry(16384, 20479, 'rw--', '', 0, resident_pages=[1])) nheap.CalculateResidentSize(mmap) # alloc1 [4, 8] is fully resident because it lays in the first resident 4k. self.assertEqual(alloc1.resident_size, 4) # alloc2 [4090, 4098] should have only 6 resident bytes ([4090,4096]), but # not the last two, which lay on the second page which is noijt resident. self.assertEqual(alloc2.resident_size, 6) # alloc3 [8190, 18190] is split as follows (* = resident): # [8190, 8192]: these 2 bytes are NOT resident, they lay in the 2nd page. # *[8192, 12288]: the 3rd page is resident and is fully covered by alloc3. # [12288, 16384]: the 4th page is fully covered as well, but not resident. # *[16384, 18190]: the 5th page is partially covered and resident. self.assertEqual(alloc3.resident_size, (12288 - 8192) + (18190 - 16384))