Esempio n. 1
0
def CollectAliasesByAddress(elf_path, tool_prefix):
  """Runs nm on |elf_path| and returns a dict of address->[names]"""
  # Constructors often show up twice, so use sets to ensure no duplicates.
  names_by_address = collections.defaultdict(set)

  # About 60mb of output, but piping takes ~30s, and loading it into RAM
  # directly takes 3s.
  args = [path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only',
          elf_path]
  output = subprocess.check_output(args)
  for line in output.splitlines():
    space_idx = line.find(' ')
    address_str = line[:space_idx]
    section = line[space_idx + 1]
    mangled_name = line[space_idx + 3:]

    # To verify that rodata does not have aliases:
    #   nm --no-sort --defined-only libchrome.so > nm.out
    #   grep -v '\$' nm.out | grep ' r ' | sort | cut -d' ' -f1 > addrs
    #   wc -l < addrs; uniq < addrs | wc -l
    if section not in 'tTW' or not _IsRelevantNmName(mangled_name):
      continue

    address = int(address_str, 16)
    if not address:
      continue
    names_by_address[address].add(mangled_name)

  # Demangle all names.
  names_by_address = demangle.DemangleSetsInDicts(names_by_address, tool_prefix)

  # Since this is run in a separate process, minimize data passing by returning
  # only aliased symbols.
  # Also: Sort to ensure stable ordering.
  return {k: sorted(v) for k, v in names_by_address.iteritems() if len(v) > 1}
Esempio n. 2
0
def CollectAliasesByAddress(elf_path, tool_prefix):
  """Runs nm on |elf_path| and returns a dict of address->[names]"""
  # Constructors often show up twice, so use sets to ensure no duplicates.
  names_by_address = collections.defaultdict(set)

  # Many OUTLINED_FUNCTION_* entries can coexist on a single address, possibly
  # mixed with regular symbols. However, naively keeping these is bad because:
  # * OUTLINED_FUNCTION_* can have many duplicates. Keeping them would cause
  #   false associations downstream, when looking up object_paths from names.
  # * For addresses with multiple OUTLINED_FUNCTION_* entries, we can't get the
  #   associated object_path (exception: the one entry in the .map file, for LLD
  #   without ThinLTO). So keeping copies around is rather useless.
  # Our solution is to merge OUTLINED_FUNCTION_* entries at the same address
  # into a single symbol. We'd also like to keep track of the number of copies
  # (although it will not be used to compute PSS computation). This is done by
  # writing the count in the name, e.g., '** outlined function * 5'.
  num_outlined_functions_at_address = collections.Counter()

  # About 60mb of output, but piping takes ~30s, and loading it into RAM
  # directly takes 3s.
  args = [path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only',
          elf_path]
  # pylint: disable=unexpected-keyword-arg
  proc = subprocess.Popen(
      args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8')
  # llvm-nm may write to stderr. Discard to denoise.
  stdout, _ = proc.communicate()
  assert proc.returncode == 0
  for line in stdout.splitlines():
    space_idx = line.find(' ')
    address_str = line[:space_idx]
    section = line[space_idx + 1]
    mangled_name = line[space_idx + 3:]

    # To verify that rodata does not have aliases:
    #   nm --no-sort --defined-only libchrome.so > nm.out
    #   grep -v '\$' nm.out | grep ' r ' | sort | cut -d' ' -f1 > addrs
    #   wc -l < addrs; uniq < addrs | wc -l
    if section not in 'tTW' or not _IsRelevantNmName(mangled_name):
      continue

    address = int(address_str, 16)
    if not address:
      continue
    if mangled_name.startswith('OUTLINED_FUNCTION_'):
      num_outlined_functions_at_address[address] += 1
    else:
      names_by_address[address].add(mangled_name)

  # Need to add before demangling because |names_by_address| changes type.
  for address, count in num_outlined_functions_at_address.items():
    name = '** outlined function' + (' * %d' % count if count > 1 else '')
    names_by_address[address].add(name)

  # Demangle all names.
  names_by_address = demangle.DemangleSetsInDicts(names_by_address, tool_prefix)

  # Since this is run in a separate process, minimize data passing by returning
  # only aliased symbols.
  # Also: Sort to ensure stable ordering.
  return {
      addr: sorted(names, key=lambda n: (n.startswith('**'), n))
      for addr, names in names_by_address.items()
      if len(names) > 1 or num_outlined_functions_at_address.get(addr, 0) > 1
  }