Exemple #1
0
def CreateSizeInfo(map_path,
                   elf_path,
                   tool_prefix,
                   output_directory,
                   normalize_names=True):
    """Creates a SizeInfo.

  Args:
    map_path: Path to the linker .map(.gz) file to parse.
    elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
        aliases and inlined functions. Can be None.
    tool_prefix: Prefix for c++filt & nm (required).
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
  """
    source_mapper = None
    if output_directory:
        # Start by finding the elf_object_paths, so that nm can run on them while
        # the linker .map is being parsed.
        logging.info('Parsing ninja files.')
        source_mapper, elf_object_paths = ninja_parser.Parse(
            output_directory, elf_path)
        logging.debug('Parsed %d .ninja files.',
                      source_mapper.parsed_file_count)
        assert not elf_path or elf_object_paths, (
            'Failed to find link command in ninja files for ' +
            os.path.relpath(elf_path, output_directory))

    if elf_path:
        # Run nm on the elf file to retrieve the list of symbol names per-address.
        # This list is required because the .map file contains only a single name
        # for each address, yet multiple symbols are often coalesced when they are
        # identical. This coalescing happens mainly for small symbols and for C++
        # templates. Such symbols make up ~500kb of libchrome.so on Android.
        elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)

        # Run nm on all .o/.a files to retrieve the symbol names within them.
        # The list is used to detect when mutiple .o files contain the same symbol
        # (e.g. inline functions), and to update the object_path / source_path
        # fields accordingly.
        # Looking in object files is required because the .map file choses a
        # single path for these symbols.
        # Rather than record all paths for each symbol, set the paths to be the
        # common ancestor of all paths.
        if output_directory:
            bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix,
                                                      output_directory)
            bulk_analyzer.AnalyzePaths(elf_object_paths)

    logging.info('Parsing Linker Map')
    with _OpenMaybeGz(map_path) as map_file:
        section_sizes, raw_symbols = (
            linker_map_parser.MapFileParser().Parse(map_file))

    if elf_path:
        logging.debug('Validating section sizes')
        elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
        for k, v in elf_section_sizes.iteritems():
            if v != section_sizes.get(k):
                logging.error(
                    'ELF file and .map file do not agree on section sizes.')
                logging.error('.map file: %r', section_sizes)
                logging.error('readelf: %r', elf_section_sizes)
                sys.exit(1)

    if elf_path and output_directory:
        missed_object_paths = _DiscoverMissedObjectPaths(
            raw_symbols, elf_object_paths)
        bulk_analyzer.AnalyzePaths(missed_object_paths)
        bulk_analyzer.Close()

    if source_mapper:
        logging.info('Looking up source paths from ninja files')
        _ExtractSourcePaths(raw_symbols, source_mapper)
        assert source_mapper.unmatched_paths_count == 0, (
            'One or more source file paths could not be found. Likely caused by '
            '.ninja files being generated at a different time than the .map file.'
        )

    logging.info('Stripping linker prefixes from symbol names')
    _StripLinkerAddedSymbolPrefixes(raw_symbols)
    # Map file for some reason doesn't unmangle all names.
    # Unmangle prints its own log statement.
    _UnmangleRemainingSymbols(raw_symbols, tool_prefix)

    if elf_path:
        logging.info('Adding aliased symbols, as reported by nm')
        # This normally does not block (it's finished by this time).
        aliases_by_address = elf_nm_result.get()
        _AddSymbolAliases(raw_symbols, aliases_by_address)

        if output_directory:
            # For aliases, this provides path information where there wasn't any.
            logging.info('Computing ancestor paths for inline functions and '
                         'normalizing object paths')

            object_paths_by_name = bulk_analyzer.Get()
            logging.debug(
                'Fetched path information for %d symbols from %d files',
                len(object_paths_by_name),
                len(elf_object_paths) + len(missed_object_paths))
            _ComputeAncestorPathsAndNormalizeObjectPaths(
                raw_symbols, object_paths_by_name, source_mapper)

    if not elf_path or not output_directory:
        logging.info('Normalizing object paths.')
        for symbol in raw_symbols:
            symbol.object_path = _NormalizeObjectPath(symbol.object_path)

    # Padding not really required, but it is useful to check for large padding and
    # log a warning.
    logging.info('Calculating padding')
    _CalculatePadding(raw_symbols)

    # Do not call _NormalizeNames() during archive since that method tends to need
    # tweaks over time. Calling it only when loading .size files allows for more
    # flexability.
    if normalize_names:
        _NormalizeNames(raw_symbols)

    logging.info('Processed %d symbols', len(raw_symbols))
    size_info = models.SizeInfo(section_sizes, raw_symbols)

    if logging.getLogger().isEnabledFor(logging.INFO):
        for line in describe.DescribeSizeInfoCoverage(size_info):
            logging.info(line)
    logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
    return size_info
Exemple #2
0
def _ParseElfInfo(map_path, elf_path, tool_prefix, output_directory,
    track_string_literals, elf_object_paths):
  """Adds Elf section sizes and symbols."""
  if elf_path:
    # Run nm on the elf file to retrieve the list of symbol names per-address.
    # This list is required because the .map file contains only a single name
    # for each address, yet multiple symbols are often coalesced when they are
    # identical. This coalescing happens mainly for small symbols and for C++
    # templates. Such symbols make up ~500kb of libchrome.so on Android.
    elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)

    # Run nm on all .o/.a files to retrieve the symbol names within them.
    # The list is used to detect when mutiple .o files contain the same symbol
    # (e.g. inline functions), and to update the object_path / source_path
    # fields accordingly.
    # Looking in object files is required because the .map file choses a
    # single path for these symbols.
    # Rather than record all paths for each symbol, set the paths to be the
    # common ancestor of all paths.
    if output_directory:
      bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory)
      bulk_analyzer.AnalyzePaths(elf_object_paths)

  logging.info('Parsing Linker Map')
  with _OpenMaybeGz(map_path) as map_file:
    section_sizes, raw_symbols = (
        linker_map_parser.MapFileParser().Parse(map_file))

  if elf_path:
    logging.debug('Validating section sizes')
    elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
    for k, v in elf_section_sizes.iteritems():
      if v != section_sizes.get(k):
        logging.error('ELF file and .map file do not agree on section sizes.')
        logging.error('.map file: %r', section_sizes)
        logging.error('readelf: %r', elf_section_sizes)
        sys.exit(1)

  if elf_path and output_directory:
    missed_object_paths = _DiscoverMissedObjectPaths(
        raw_symbols, elf_object_paths)
    bulk_analyzer.AnalyzePaths(missed_object_paths)
    bulk_analyzer.SortPaths()
    if track_string_literals:
      merge_string_syms = [s for s in raw_symbols if
                           s.full_name == '** merge strings' or
                           s.full_name == '** lld merge strings']
      # More likely for there to be a bug in supersize than an ELF to not have a
      # single string literal.
      assert merge_string_syms
      string_positions = [(s.address, s.size) for s in merge_string_syms]
      bulk_analyzer.AnalyzeStringLiterals(elf_path, string_positions)

  logging.info('Stripping linker prefixes from symbol names')
  _StripLinkerAddedSymbolPrefixes(raw_symbols)
  # Map file for some reason doesn't demangle all names.
  # Demangle prints its own log statement.
  demangle.DemangleRemainingSymbols(raw_symbols, tool_prefix)

  if elf_path:
    logging.info(
        'Adding symbols removed by identical code folding (as reported by nm)')
    # This normally does not block (it's finished by this time).
    names_by_address = elf_nm_result.get()
    raw_symbols = _AddNmAliases(raw_symbols, names_by_address)

    if output_directory:
      object_paths_by_name = bulk_analyzer.GetSymbolNames()
      logging.debug('Fetched path information for %d symbols from %d files',
                    len(object_paths_by_name),
                    len(elf_object_paths) + len(missed_object_paths))

      # For aliases, this provides path information where there wasn't any.
      logging.info('Creating aliases for symbols shared by multiple paths')
      raw_symbols = _AssignNmAliasPathsAndCreatePathAliases(
          raw_symbols, object_paths_by_name)

      if track_string_literals:
        logging.info('Waiting for string literal extraction to complete.')
        list_of_positions_by_object_path = bulk_analyzer.GetStringPositions()
      bulk_analyzer.Close()

      if track_string_literals:
        logging.info('Deconstructing ** merge strings into literals')
        replacements = _CreateMergeStringsReplacements(merge_string_syms,
            list_of_positions_by_object_path)
        for merge_sym, literal_syms in itertools.izip(
            merge_string_syms, replacements):
          # Don't replace if no literals were found.
          if literal_syms:
            # Re-find the symbols since aliases cause their indices to change.
            idx = raw_symbols.index(merge_sym)
            # This assignment is a bit slow (causes array to be shifted), but
            # is fast enough since len(merge_string_syms) < 10.
            raw_symbols[idx:idx + 1] = literal_syms

  return section_sizes, raw_symbols
Exemple #3
0
def CreateSizeInfo(map_path,
                   elf_path,
                   tool_prefix,
                   output_directory,
                   normalize_names=True,
                   track_string_literals=True):
    """Creates a SizeInfo.

  Args:
    map_path: Path to the linker .map(.gz) file to parse.
    elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
        aliases and inlined functions. Can be None.
    tool_prefix: Prefix for c++filt & nm (required).
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
    normalize_names: Whether to normalize symbol names.
    track_string_literals: Whether to break down "** merge string" sections into
        smaller symbols (requires output_directory).
  """
    source_mapper = None
    if output_directory:
        # Start by finding the elf_object_paths, so that nm can run on them while
        # the linker .map is being parsed.
        logging.info('Parsing ninja files.')
        source_mapper, elf_object_paths = ninja_parser.Parse(
            output_directory, elf_path)
        logging.debug('Parsed %d .ninja files.',
                      source_mapper.parsed_file_count)
        assert not elf_path or elf_object_paths, (
            'Failed to find link command in ninja files for ' +
            os.path.relpath(elf_path, output_directory))

    if elf_path:
        # Run nm on the elf file to retrieve the list of symbol names per-address.
        # This list is required because the .map file contains only a single name
        # for each address, yet multiple symbols are often coalesced when they are
        # identical. This coalescing happens mainly for small symbols and for C++
        # templates. Such symbols make up ~500kb of libchrome.so on Android.
        elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix)

        # Run nm on all .o/.a files to retrieve the symbol names within them.
        # The list is used to detect when mutiple .o files contain the same symbol
        # (e.g. inline functions), and to update the object_path / source_path
        # fields accordingly.
        # Looking in object files is required because the .map file choses a
        # single path for these symbols.
        # Rather than record all paths for each symbol, set the paths to be the
        # common ancestor of all paths.
        if output_directory:
            bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix,
                                                      output_directory)
            bulk_analyzer.AnalyzePaths(elf_object_paths)

    logging.info('Parsing Linker Map')
    with _OpenMaybeGz(map_path) as map_file:
        section_sizes, raw_symbols = (
            linker_map_parser.MapFileParser().Parse(map_file))

    if elf_path:
        logging.debug('Validating section sizes')
        elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix)
        for k, v in elf_section_sizes.iteritems():
            if v != section_sizes.get(k):
                logging.error(
                    'ELF file and .map file do not agree on section sizes.')
                logging.error('.map file: %r', section_sizes)
                logging.error('readelf: %r', elf_section_sizes)
                sys.exit(1)

    if elf_path and output_directory:
        missed_object_paths = _DiscoverMissedObjectPaths(
            raw_symbols, elf_object_paths)
        bulk_analyzer.AnalyzePaths(missed_object_paths)
        bulk_analyzer.SortPaths()
        if track_string_literals:
            merge_string_syms = [
                s for s in raw_symbols if s.full_name == '** merge strings'
                or s.full_name == '** lld merge strings'
            ]
            # More likely for there to be a bug in supersize than an ELF to not have a
            # single string literal.
            assert merge_string_syms
            string_positions = [(s.address, s.size) for s in merge_string_syms]
            bulk_analyzer.AnalyzeStringLiterals(elf_path, string_positions)

    logging.info('Stripping linker prefixes from symbol names')
    _StripLinkerAddedSymbolPrefixes(raw_symbols)
    # Map file for some reason doesn't unmangle all names.
    # Unmangle prints its own log statement.
    _UnmangleRemainingSymbols(raw_symbols, tool_prefix)

    if elf_path:
        logging.info(
            'Adding symbols removed by identical code folding (as reported by nm)'
        )
        # This normally does not block (it's finished by this time).
        names_by_address = elf_nm_result.get()
        _AddNmAliases(raw_symbols, names_by_address)

        if output_directory:
            object_paths_by_name = bulk_analyzer.GetSymbolNames()
            logging.debug(
                'Fetched path information for %d symbols from %d files',
                len(object_paths_by_name),
                len(elf_object_paths) + len(missed_object_paths))

            # For aliases, this provides path information where there wasn't any.
            logging.info(
                'Creating aliases for symbols shared by multiple paths')
            raw_symbols = _AssignNmAliasPathsAndCreatePathAliases(
                raw_symbols, object_paths_by_name)

            if track_string_literals:
                logging.info(
                    'Waiting for string literal extraction to complete.')
                list_of_positions_by_object_path = bulk_analyzer.GetStringPositions(
                )
            bulk_analyzer.Close()

            if track_string_literals:
                logging.info('Deconstructing ** merge strings into literals')
                replacements = _CreateMergeStringsReplacements(
                    merge_string_syms, list_of_positions_by_object_path)
                for merge_sym, literal_syms in itertools.izip(
                        merge_string_syms, replacements):
                    # Don't replace if no literals were found.
                    if literal_syms:
                        # Re-find the symbols since aliases cause their indices to change.
                        idx = raw_symbols.index(merge_sym)
                        # This assignment is a bit slow (causes array to be shifted), but
                        # is fast enough since len(merge_string_syms) < 10.
                        raw_symbols[idx:idx + 1] = literal_syms

    _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper)
    logging.info('Converting excessive aliases into shared-path symbols')
    _CompactLargeAliasesIntoSharedSymbols(raw_symbols)
    logging.debug('Connecting nm aliases')
    _ConnectNmAliases(raw_symbols)

    # Padding not really required, but it is useful to check for large padding and
    # log a warning.
    logging.info('Calculating padding')
    _CalculatePadding(raw_symbols)

    # Do not call _NormalizeNames() during archive since that method tends to need
    # tweaks over time. Calling it only when loading .size files allows for more
    # flexability.
    if normalize_names:
        _NormalizeNames(raw_symbols)

    logging.info('Processed %d symbols', len(raw_symbols))
    size_info = models.SizeInfo(section_sizes, raw_symbols)

    if logging.getLogger().isEnabledFor(logging.INFO):
        for line in describe.DescribeSizeInfoCoverage(size_info):
            logging.info(line)
    logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
    return size_info