def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, normalize_names=True): """Creates a SizeInfo. Args: map_path: Path to the linker .map(.gz) file to parse. elf_path: Path to the corresponding unstripped ELF file. Used to find symbol aliases and inlined functions. Can be None. tool_prefix: Prefix for c++filt & nm (required). output_directory: Build output directory. If None, source_paths and symbol alias information will not be recorded. """ source_mapper = None if output_directory: # Start by finding the elf_object_paths, so that nm can run on them while # the linker .map is being parsed. logging.info('Parsing ninja files.') source_mapper, elf_object_paths = ninja_parser.Parse( output_directory, elf_path) logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) assert not elf_path or elf_object_paths, ( 'Failed to find link command in ninja files for ' + os.path.relpath(elf_path, output_directory)) if elf_path: # Run nm on the elf file to retrieve the list of symbol names per-address. # This list is required because the .map file contains only a single name # for each address, yet multiple symbols are often coalesced when they are # identical. This coalescing happens mainly for small symbols and for C++ # templates. Such symbols make up ~500kb of libchrome.so on Android. elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) # Run nm on all .o/.a files to retrieve the symbol names within them. # The list is used to detect when mutiple .o files contain the same symbol # (e.g. inline functions), and to update the object_path / source_path # fields accordingly. # Looking in object files is required because the .map file choses a # single path for these symbols. # Rather than record all paths for each symbol, set the paths to be the # common ancestor of all paths. if output_directory: bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) bulk_analyzer.AnalyzePaths(elf_object_paths) logging.info('Parsing Linker Map') with _OpenMaybeGz(map_path) as map_file: section_sizes, raw_symbols = ( linker_map_parser.MapFileParser().Parse(map_file)) if elf_path: logging.debug('Validating section sizes') elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) for k, v in elf_section_sizes.iteritems(): if v != section_sizes.get(k): logging.error( 'ELF file and .map file do not agree on section sizes.') logging.error('.map file: %r', section_sizes) logging.error('readelf: %r', elf_section_sizes) sys.exit(1) if elf_path and output_directory: missed_object_paths = _DiscoverMissedObjectPaths( raw_symbols, elf_object_paths) bulk_analyzer.AnalyzePaths(missed_object_paths) bulk_analyzer.Close() if source_mapper: logging.info('Looking up source paths from ninja files') _ExtractSourcePaths(raw_symbols, source_mapper) assert source_mapper.unmatched_paths_count == 0, ( 'One or more source file paths could not be found. Likely caused by ' '.ninja files being generated at a different time than the .map file.' ) logging.info('Stripping linker prefixes from symbol names') _StripLinkerAddedSymbolPrefixes(raw_symbols) # Map file for some reason doesn't unmangle all names. # Unmangle prints its own log statement. _UnmangleRemainingSymbols(raw_symbols, tool_prefix) if elf_path: logging.info('Adding aliased symbols, as reported by nm') # This normally does not block (it's finished by this time). aliases_by_address = elf_nm_result.get() _AddSymbolAliases(raw_symbols, aliases_by_address) if output_directory: # For aliases, this provides path information where there wasn't any. logging.info('Computing ancestor paths for inline functions and ' 'normalizing object paths') object_paths_by_name = bulk_analyzer.Get() logging.debug( 'Fetched path information for %d symbols from %d files', len(object_paths_by_name), len(elf_object_paths) + len(missed_object_paths)) _ComputeAncestorPathsAndNormalizeObjectPaths( raw_symbols, object_paths_by_name, source_mapper) if not elf_path or not output_directory: logging.info('Normalizing object paths.') for symbol in raw_symbols: symbol.object_path = _NormalizeObjectPath(symbol.object_path) # Padding not really required, but it is useful to check for large padding and # log a warning. logging.info('Calculating padding') _CalculatePadding(raw_symbols) # Do not call _NormalizeNames() during archive since that method tends to need # tweaks over time. Calling it only when loading .size files allows for more # flexability. if normalize_names: _NormalizeNames(raw_symbols) logging.info('Processed %d symbols', len(raw_symbols)) size_info = models.SizeInfo(section_sizes, raw_symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) return size_info
def _ParseElfInfo(map_path, elf_path, tool_prefix, output_directory, track_string_literals, elf_object_paths): """Adds Elf section sizes and symbols.""" if elf_path: # Run nm on the elf file to retrieve the list of symbol names per-address. # This list is required because the .map file contains only a single name # for each address, yet multiple symbols are often coalesced when they are # identical. This coalescing happens mainly for small symbols and for C++ # templates. Such symbols make up ~500kb of libchrome.so on Android. elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) # Run nm on all .o/.a files to retrieve the symbol names within them. # The list is used to detect when mutiple .o files contain the same symbol # (e.g. inline functions), and to update the object_path / source_path # fields accordingly. # Looking in object files is required because the .map file choses a # single path for these symbols. # Rather than record all paths for each symbol, set the paths to be the # common ancestor of all paths. if output_directory: bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) bulk_analyzer.AnalyzePaths(elf_object_paths) logging.info('Parsing Linker Map') with _OpenMaybeGz(map_path) as map_file: section_sizes, raw_symbols = ( linker_map_parser.MapFileParser().Parse(map_file)) if elf_path: logging.debug('Validating section sizes') elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) for k, v in elf_section_sizes.iteritems(): if v != section_sizes.get(k): logging.error('ELF file and .map file do not agree on section sizes.') logging.error('.map file: %r', section_sizes) logging.error('readelf: %r', elf_section_sizes) sys.exit(1) if elf_path and output_directory: missed_object_paths = _DiscoverMissedObjectPaths( raw_symbols, elf_object_paths) bulk_analyzer.AnalyzePaths(missed_object_paths) bulk_analyzer.SortPaths() if track_string_literals: merge_string_syms = [s for s in raw_symbols if s.full_name == '** merge strings' or s.full_name == '** lld merge strings'] # More likely for there to be a bug in supersize than an ELF to not have a # single string literal. assert merge_string_syms string_positions = [(s.address, s.size) for s in merge_string_syms] bulk_analyzer.AnalyzeStringLiterals(elf_path, string_positions) logging.info('Stripping linker prefixes from symbol names') _StripLinkerAddedSymbolPrefixes(raw_symbols) # Map file for some reason doesn't demangle all names. # Demangle prints its own log statement. demangle.DemangleRemainingSymbols(raw_symbols, tool_prefix) if elf_path: logging.info( 'Adding symbols removed by identical code folding (as reported by nm)') # This normally does not block (it's finished by this time). names_by_address = elf_nm_result.get() raw_symbols = _AddNmAliases(raw_symbols, names_by_address) if output_directory: object_paths_by_name = bulk_analyzer.GetSymbolNames() logging.debug('Fetched path information for %d symbols from %d files', len(object_paths_by_name), len(elf_object_paths) + len(missed_object_paths)) # For aliases, this provides path information where there wasn't any. logging.info('Creating aliases for symbols shared by multiple paths') raw_symbols = _AssignNmAliasPathsAndCreatePathAliases( raw_symbols, object_paths_by_name) if track_string_literals: logging.info('Waiting for string literal extraction to complete.') list_of_positions_by_object_path = bulk_analyzer.GetStringPositions() bulk_analyzer.Close() if track_string_literals: logging.info('Deconstructing ** merge strings into literals') replacements = _CreateMergeStringsReplacements(merge_string_syms, list_of_positions_by_object_path) for merge_sym, literal_syms in itertools.izip( merge_string_syms, replacements): # Don't replace if no literals were found. if literal_syms: # Re-find the symbols since aliases cause their indices to change. idx = raw_symbols.index(merge_sym) # This assignment is a bit slow (causes array to be shifted), but # is fast enough since len(merge_string_syms) < 10. raw_symbols[idx:idx + 1] = literal_syms return section_sizes, raw_symbols
def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, normalize_names=True, track_string_literals=True): """Creates a SizeInfo. Args: map_path: Path to the linker .map(.gz) file to parse. elf_path: Path to the corresponding unstripped ELF file. Used to find symbol aliases and inlined functions. Can be None. tool_prefix: Prefix for c++filt & nm (required). output_directory: Build output directory. If None, source_paths and symbol alias information will not be recorded. normalize_names: Whether to normalize symbol names. track_string_literals: Whether to break down "** merge string" sections into smaller symbols (requires output_directory). """ source_mapper = None if output_directory: # Start by finding the elf_object_paths, so that nm can run on them while # the linker .map is being parsed. logging.info('Parsing ninja files.') source_mapper, elf_object_paths = ninja_parser.Parse( output_directory, elf_path) logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) assert not elf_path or elf_object_paths, ( 'Failed to find link command in ninja files for ' + os.path.relpath(elf_path, output_directory)) if elf_path: # Run nm on the elf file to retrieve the list of symbol names per-address. # This list is required because the .map file contains only a single name # for each address, yet multiple symbols are often coalesced when they are # identical. This coalescing happens mainly for small symbols and for C++ # templates. Such symbols make up ~500kb of libchrome.so on Android. elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) # Run nm on all .o/.a files to retrieve the symbol names within them. # The list is used to detect when mutiple .o files contain the same symbol # (e.g. inline functions), and to update the object_path / source_path # fields accordingly. # Looking in object files is required because the .map file choses a # single path for these symbols. # Rather than record all paths for each symbol, set the paths to be the # common ancestor of all paths. if output_directory: bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) bulk_analyzer.AnalyzePaths(elf_object_paths) logging.info('Parsing Linker Map') with _OpenMaybeGz(map_path) as map_file: section_sizes, raw_symbols = ( linker_map_parser.MapFileParser().Parse(map_file)) if elf_path: logging.debug('Validating section sizes') elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) for k, v in elf_section_sizes.iteritems(): if v != section_sizes.get(k): logging.error( 'ELF file and .map file do not agree on section sizes.') logging.error('.map file: %r', section_sizes) logging.error('readelf: %r', elf_section_sizes) sys.exit(1) if elf_path and output_directory: missed_object_paths = _DiscoverMissedObjectPaths( raw_symbols, elf_object_paths) bulk_analyzer.AnalyzePaths(missed_object_paths) bulk_analyzer.SortPaths() if track_string_literals: merge_string_syms = [ s for s in raw_symbols if s.full_name == '** merge strings' or s.full_name == '** lld merge strings' ] # More likely for there to be a bug in supersize than an ELF to not have a # single string literal. assert merge_string_syms string_positions = [(s.address, s.size) for s in merge_string_syms] bulk_analyzer.AnalyzeStringLiterals(elf_path, string_positions) logging.info('Stripping linker prefixes from symbol names') _StripLinkerAddedSymbolPrefixes(raw_symbols) # Map file for some reason doesn't unmangle all names. # Unmangle prints its own log statement. _UnmangleRemainingSymbols(raw_symbols, tool_prefix) if elf_path: logging.info( 'Adding symbols removed by identical code folding (as reported by nm)' ) # This normally does not block (it's finished by this time). names_by_address = elf_nm_result.get() _AddNmAliases(raw_symbols, names_by_address) if output_directory: object_paths_by_name = bulk_analyzer.GetSymbolNames() logging.debug( 'Fetched path information for %d symbols from %d files', len(object_paths_by_name), len(elf_object_paths) + len(missed_object_paths)) # For aliases, this provides path information where there wasn't any. logging.info( 'Creating aliases for symbols shared by multiple paths') raw_symbols = _AssignNmAliasPathsAndCreatePathAliases( raw_symbols, object_paths_by_name) if track_string_literals: logging.info( 'Waiting for string literal extraction to complete.') list_of_positions_by_object_path = bulk_analyzer.GetStringPositions( ) bulk_analyzer.Close() if track_string_literals: logging.info('Deconstructing ** merge strings into literals') replacements = _CreateMergeStringsReplacements( merge_string_syms, list_of_positions_by_object_path) for merge_sym, literal_syms in itertools.izip( merge_string_syms, replacements): # Don't replace if no literals were found. if literal_syms: # Re-find the symbols since aliases cause their indices to change. idx = raw_symbols.index(merge_sym) # This assignment is a bit slow (causes array to be shifted), but # is fast enough since len(merge_string_syms) < 10. raw_symbols[idx:idx + 1] = literal_syms _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper) logging.info('Converting excessive aliases into shared-path symbols') _CompactLargeAliasesIntoSharedSymbols(raw_symbols) logging.debug('Connecting nm aliases') _ConnectNmAliases(raw_symbols) # Padding not really required, but it is useful to check for large padding and # log a warning. logging.info('Calculating padding') _CalculatePadding(raw_symbols) # Do not call _NormalizeNames() during archive since that method tends to need # tweaks over time. Calling it only when loading .size files allows for more # flexability. if normalize_names: _NormalizeNames(raw_symbols) logging.info('Processed %d symbols', len(raw_symbols)) size_info = models.SizeInfo(section_sizes, raw_symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) return size_info