Example #1
0
    def runTest(self):
        rule_tree = native_heap_classifier.LoadRules(_TEST_RULES)
        nheap = native_heap.NativeHeap()
        mock_addr = 0
        for test_entry in _TEST_STACK_TRACES:
            mock_strace = stacktrace.Stacktrace()
            for mock_btstr in test_entry[1]:
                mock_addr += 4  # Addr is irrelevant, just keep it distinct.
                mock_frame = stacktrace.Frame(mock_addr)
                mock_frame.SetSymbolInfo(symbol.Symbol(mock_btstr))
                mock_strace.Add(mock_frame)
            nheap.Add(
                native_heap.Allocation(size=test_entry[0],
                                       count=1,
                                       stack_trace=mock_strace))

        res = native_heap_classifier.Classify(nheap, rule_tree)

        def CheckResult(node, prefix):
            node_name = prefix + node.name
            self.assertIn(node_name, _EXPECTED_RESULTS)
            self.assertEqual(node.values, _EXPECTED_RESULTS[node_name])
            for child in node.children:
                CheckResult(child, node_name + '::')

        CheckResult(res.total, '')
Example #2
0
  def testStandardRuleParsingAndProcessing(self):
    rule_tree = native_heap_classifier.LoadRules(_TEST_RULES)
    nheap = native_heap.NativeHeap()
    mock_addr = 0
    for test_entry in _TEST_STACK_TRACES:
      mock_strace = stacktrace.Stacktrace()
      for (mock_btstr, mock_source_path) in test_entry[1]:
        mock_addr += 4  # Addr is irrelevant, just keep it distinct.
        mock_frame = stacktrace.Frame(mock_addr)
        mock_frame.SetSymbolInfo(symbol.Symbol(mock_btstr, mock_source_path))
        mock_strace.Add(mock_frame)
      nheap.Add(native_heap.Allocation(
          size=test_entry[0], count=1, stack_trace=mock_strace))

    res = native_heap_classifier.Classify(nheap, rule_tree)
    self._CheckResult(res.total, '', _EXPECTED_RESULTS)
Example #3
0
  def testInferHeuristicRules(self):
    nheap = native_heap.NativeHeap()
    mock_addr = 0
    for (mock_alloc_size, mock_source_path) in _HEURISTIC_TEST_STACK_TRACES:
      mock_strace = stacktrace.Stacktrace()
      mock_addr += 4  # Addr is irrelevant, just keep it distinct.
      mock_frame = stacktrace.Frame(mock_addr)
      mock_frame.SetSymbolInfo(symbol.Symbol(str(mock_addr), mock_source_path))
      for _ in xrange(10):  # Just repeat the same stack frame 10 times
        mock_strace.Add(mock_frame)
      nheap.Add(native_heap.Allocation(
          size=mock_alloc_size, count=1, stack_trace=mock_strace))

    rule_tree = native_heap_classifier.InferHeuristicRulesFromHeap(
        nheap, threshold=0.05)
    res = native_heap_classifier.Classify(nheap, rule_tree)
    self._CheckResult(res.total, '', _HEURISTIC_EXPECTED_RESULTS)
Example #4
0
 def testNativeHeap(self):
   archive = self._storage.OpenArchive('nheap', create=True)
   timestamp = archive.StartNewSnapshot()
   nh = native_heap.NativeHeap()
   for i in xrange(1, 4):
     stack_trace = stacktrace.Stacktrace()
     frame = nh.GetStackFrame(i * 10 + 1)
     frame.SetExecFileInfo('foo.so', 1)
     stack_trace.Add(frame)
     frame = nh.GetStackFrame(i * 10 + 2)
     frame.SetExecFileInfo('bar.so', 2)
     stack_trace.Add(frame)
     nh.Add(native_heap.Allocation(i * 2, i * 3, stack_trace))
   archive.StoreNativeHeap(nh)
   nh_deser = archive.LoadNativeHeap(timestamp)
   self._DeepCompare(nh, nh_deser)
   self._storage.DeleteArchive('nheap')
Example #5
0
 def decode(self, json_str):  # pylint: disable=W0221
     d = super(NativeHeapDecoder, self).decode(json_str)
     nh = native_heap.NativeHeap()
     # First load and rebuild the stack_frame index.
     for frame_dict in d['stack_frames']:
         frame = nh.GetStackFrame(frame_dict['address'])
         frame.SetExecFileInfo(frame_dict['exec_file_rel_path'],
                               frame_dict['offset'])
     # Then load backtraces (reusing stack frames from the index above).
     for alloc_dict in d['allocations']:
         stack_trace = stacktrace.Stacktrace()
         for absolute_addr in alloc_dict['stack_trace']:
             stack_trace.Add(nh.GetStackFrame(absolute_addr))
         allocation = native_heap.Allocation(alloc_dict['size'],
                                             alloc_dict['count'],
                                             stack_trace)
         nh.Add(allocation)
     return nh
def Parse(content):
    """Parses the output of the heap_dump binary (part of libheap_profiler).

  heap_dump provides a conveniente JSON output.
  See the header of tools/android/heap_profiler/heap_dump.c for more details.

  Args:
      content: string containing the command output.

  Returns:
      An instance of |native_heap.NativeHeap|.
  """
    data = json.loads(content)
    assert ('allocs'
            in data), 'Need to run heap_dump with the -x (extended) arg.'
    nativeheap = native_heap.NativeHeap()
    strace_by_index = {}  # index (str) -> |stacktrace.Stacktrace|

    for index, entry in data['stacks'].iteritems():
        strace = stacktrace.Stacktrace()
        for absolute_addr in entry['f']:
            strace.Add(nativeheap.GetStackFrame(absolute_addr))
        strace_by_index[index] = strace

    for start_addr, entry in data['allocs'].iteritems():
        flags = int(entry['f'])
        # TODO(primiano): For the moment we just skip completely the allocations
        # made in the Zygote (pre-fork) because this is usually reasonable. In the
        # near future we will expose them with some UI to selectively filter them.
        if flags & FLAGS_IN_ZYGOTE:
            continue
        nativeheap.Add(
            native_heap.Allocation(size=entry['l'],
                                   stack_trace=strace_by_index[entry['s']],
                                   start=int(start_addr, 16),
                                   flags=flags))

    return nativeheap
def Parse(lines):
  """Parses the output of Android's am dumpheap -n.

  am dumpheap dumps the oustanding malloc information (when the system property
  libc.debug.malloc == 1).

  The expected dumpheap output looks like this:
  ------------------------------------------------------------------------------
  ... Some irrelevant banner lines ...
  z 0  sz    1000   num    3  bt 1234 5678 9abc ...
  ...
  MAPS
  9dcd0000-9dcd6000 r-xp 00000000 103:00 815       /system/lib/libnbaio.so
  ...
  ------------------------------------------------------------------------------
  The lines before MAPS list the allocations grouped by {size, backtrace}. In
  the example above, "1000" is the size of each alloc, "3" is their cardinality
  and "1234 5678 9abc" are the first N stack frames (absolute addresses in the
  process virtual address space).  The lines after MAPS provide essentially the
  same information of /proc/PID/smaps.
  See tests/android_backend_test.py for a more complete example.

  Args:
      lines: array of strings containing the am dumpheap -n output.

  Returns:
      An instance of |native_heap.NativeHeap|.
  """
  (STATE_PARSING_BACKTRACES, STATE_PARSING_MAPS, STATE_ENDED) = range(3)
  BT_RE = re.compile(
      r'^\w+\s+\d+\s+sz\s+(\d+)\s+num\s+(\d+)\s+bt\s+((?:[0-9a-f]+\s?)+)$')
  MAP_RE = re.compile(
      r'^([0-9a-f]+)-([0-9a-f]+)\s+....\s*([0-9a-f]+)\s+\w+:\w+\s+\d+\s*(.*)$')

  state = STATE_PARSING_BACKTRACES
  skip_first_n_lines = 5
  mmap = memory_map.Map()
  stack_frames = {}  # absolute_address (int) -> |stacktrace.Frame|.
  nativeheap = native_heap.NativeHeap()

  for line in lines:
    line = line.rstrip('\r\n')
    if skip_first_n_lines > 0:
      skip_first_n_lines -= 1
      continue

    if state == STATE_PARSING_BACKTRACES:
      if line == 'MAPS':
        state = STATE_PARSING_MAPS
        continue
      m = BT_RE.match(line)
      if not m:
        logging.warning('Skipping unrecognized dumpheap alloc: "%s"' % line)
        continue
      alloc_size = int(m.group(1))
      alloc_count = int(m.group(2))
      alloc_bt_str = m.group(3)
      strace = stacktrace.Stacktrace()
      # Keep only one |stacktrace.Frame| per distinct |absolute_addr|, in order
      # to ease the complexity of the final de-offset pass.
      for absolute_addr in alloc_bt_str.split():
        absolute_addr = int(absolute_addr, 16)
        stack_frame = stack_frames.get(absolute_addr)
        if not stack_frame:
          stack_frame = stacktrace.Frame(absolute_addr)
          stack_frames[absolute_addr] = stack_frame
        strace.Add(stack_frame)
      nativeheap.Add(native_heap.Allocation(alloc_size, alloc_count, strace))

    # The am dumpheap output contains also a list of mmaps. This information is
    # used in this module for the only purpose of normalizing addresses (i.e.
    # translating an absolute addr into its offset inside the mmap-ed library).
    # The mmap information is not further retained. A more complete mmap dump is
    # performed (and retained) using the memdump tool (see memdump_parser.py).
    elif state == STATE_PARSING_MAPS:
      if line == 'END':
        state = STATE_ENDED
        continue
      m = MAP_RE.match(line)
      if not m:
        logging.warning('Skipping unrecognized dumpheap mmap: "%s"' % line)
        continue
      mmap.Add(memory_map.MapEntry(
          start=int(m.group(1), 16),
          end=int(m.group(2), 16),
          prot_flags='----', # Not really needed for lookup
          mapped_file=m.group(4),
          mapped_offset=int(m.group(3), 16)))

    elif state == STATE_ENDED:
      pass

    else:
      assert(False)

  # Final pass: translate all the stack frames' absolute addresses into
  # relative offsets (exec_file + offset) using the memory maps just processed.
  for abs_addr, stack_frame in stack_frames.iteritems():
    assert(abs_addr == stack_frame.address)
    map_entry = mmap.Lookup(abs_addr)
    if not map_entry:
      continue
    stack_frame.SetExecFileInfo(map_entry.mapped_file,
                                map_entry.GetRelativeOffset(abs_addr))

  return nativeheap
Example #8
0
    def runTest(self):
        nheap = native_heap.NativeHeap()

        EXE_1_MM_BASE = 64 * PAGE_SIZE
        EXE_2_MM_BASE = 65 * PAGE_SIZE
        EXE_2_FILE_OFF = 8192
        st1 = stacktrace.Stacktrace()
        st1.Add(nheap.GetStackFrame(EXE_1_MM_BASE))
        st1.Add(nheap.GetStackFrame(EXE_1_MM_BASE + 4))

        st2 = stacktrace.Stacktrace()
        st2.Add(nheap.GetStackFrame(EXE_1_MM_BASE))
        st2.Add(nheap.GetStackFrame(EXE_2_MM_BASE + 4))
        st2.Add(nheap.GetStackFrame(EXE_2_MM_BASE + PAGE_SIZE + 4))

        # Check that GetStackFrames keeps one unique object instance per address.
        # This is to guarantee that the symbolization logic (SymbolizeUsingSymbolDB)
        # can cheaply iterate on distinct stack frames rather than re-processing
        # every stack frame for each allocation (and save memory as well).
        self.assertIs(st1[0], st2[0])
        self.assertIsNot(st1[0], st1[1])
        self.assertIsNot(st2[0], st2[1])

        alloc1 = native_heap.Allocation(start=4, size=4, stack_trace=st1)
        alloc2 = native_heap.Allocation(start=4090, size=8, stack_trace=st1)
        alloc3 = native_heap.Allocation(start=8190,
                                        size=10000,
                                        stack_trace=st2)
        nheap.Add(alloc1)
        nheap.Add(alloc2)
        nheap.Add(alloc3)

        self.assertEqual(len(nheap.allocations), 3)
        self.assertIn(alloc1, nheap.allocations)
        self.assertIn(alloc2, nheap.allocations)
        self.assertIn(alloc3, nheap.allocations)

        ############################################################################
        # Test the relativization (absolute address -> mmap + offset) logic.
        ############################################################################
        mmap = memory_map
        mmap = memory_map.Map()
        mmap.Add(
            memory_map.MapEntry(EXE_1_MM_BASE, EXE_1_MM_BASE + PAGE_SIZE - 1,
                                'rw--', '/d/exe1', 0))
        mmap.Add(
            memory_map.MapEntry(EXE_2_MM_BASE, EXE_2_MM_BASE + PAGE_SIZE - 1,
                                'rw--', 'exe2', EXE_2_FILE_OFF))
        # Entry for EXE_3 is deliberately missing to check the fallback behavior.

        nheap.RelativizeStackFrames(mmap)

        self.assertEqual(st1[0].exec_file_rel_path, '/d/exe1')
        self.assertEqual(st1[0].exec_file_name, 'exe1')
        self.assertEqual(st1[0].offset, 0)

        self.assertEqual(st1[1].exec_file_rel_path, '/d/exe1')
        self.assertEqual(st1[1].exec_file_name, 'exe1')
        self.assertEqual(st1[1].offset, 4)

        self.assertEqual(st2[0].exec_file_rel_path, '/d/exe1')
        self.assertEqual(st2[0].exec_file_name, 'exe1')
        self.assertEqual(st2[0].offset, 0)

        self.assertEqual(st2[1].exec_file_rel_path, 'exe2')
        self.assertEqual(st2[1].exec_file_name, 'exe2')
        self.assertEqual(st2[1].offset, 4 + EXE_2_FILE_OFF)

        self.assertIsNone(st2[2].exec_file_rel_path)
        self.assertIsNone(st2[2].exec_file_name)
        self.assertIsNone(st2[2].offset)

        ############################################################################
        # Test the symbolization logic.
        ############################################################################
        syms = symbol.Symbols()
        syms.Add('/d/exe1', 0, symbol.Symbol('sym1', 'src1.c', 1))  # st1[0]
        syms.Add('exe2', 4 + EXE_2_FILE_OFF, symbol.Symbol('sym3'))  # st2[1]

        nheap.SymbolizeUsingSymbolDB(syms)
        self.assertEqual(st1[0].symbol.name, 'sym1')
        self.assertEqual(st1[0].symbol.source_info[0].source_file_path,
                         'src1.c')
        self.assertEqual(st1[0].symbol.source_info[0].line_number, 1)

        # st1[1] should have no symbol info, because we didn't provide any above.
        self.assertIsNone(st1[1].symbol)

        # st2[0] and st1[0] were the same Frame. Expect identical symbols instances.
        self.assertIs(st2[0].symbol, st1[0].symbol)

        # st2[1] should have a symbols name, but no source line info.
        self.assertEqual(st2[1].symbol.name, 'sym3')
        self.assertEqual(len(st2[1].symbol.source_info), 0)

        # st2[2] should have no sym because we didn't even provide a mmap for exe3.
        self.assertIsNone(st2[2].symbol)

        ############################################################################
        # Test the resident size calculation logic (intersects mmaps and allocs).
        ############################################################################
        mmap.Add(
            memory_map.MapEntry(0, 8191, 'rw--', '', 0, resident_pages=[1]))
        mmap.Add(
            memory_map.MapEntry(8192, 12287, 'rw--', '', 0,
                                resident_pages=[1]))
        # [12k, 16k] is deliberately missing to check the fallback behavior.
        mmap.Add(
            memory_map.MapEntry(16384,
                                20479,
                                'rw--',
                                '',
                                0,
                                resident_pages=[1]))
        nheap.CalculateResidentSize(mmap)

        # alloc1 [4, 8] is fully resident because it lays in the first resident 4k.
        self.assertEqual(alloc1.resident_size, 4)

        # alloc2 [4090, 4098] should have only 6 resident bytes ([4090,4096]), but
        # not the last two, which lay on the second page which is noijt resident.
        self.assertEqual(alloc2.resident_size, 6)

        # alloc3 [8190, 18190] is split as follows (* = resident):
        #  [8190, 8192]: these 2 bytes are NOT resident, they lay in the 2nd page.
        # *[8192, 12288]: the 3rd page is resident and is fully covered by alloc3.
        #  [12288, 16384]: the 4th page is fully covered as well, but not resident.
        # *[16384, 18190]: the 5th page is partially covered and resident.
        self.assertEqual(alloc3.resident_size,
                         (12288 - 8192) + (18190 - 16384))