def _DoArchiveTest(self, use_output_directory=True, use_elf=True, use_pak=False, debug_measures=False): with tempfile.NamedTemporaryFile(suffix='.size') as temp_file: self._DoArchive( temp_file.name, use_output_directory=use_output_directory, use_elf=use_elf, use_pak=use_pak, debug_measures=debug_measures) size_info = archive.LoadAndPostProcessSizeInfo(temp_file.name) # Check that saving & loading is the same as directly parsing. expected_size_info = self._CloneSizeInfo( use_output_directory=use_output_directory, use_elf=use_elf, use_pak=use_pak) self.assertEquals(expected_size_info.metadata, size_info.metadata) # Don't cluster. expected_size_info.symbols = expected_size_info.raw_symbols size_info.symbols = size_info.raw_symbols expected = list(describe.GenerateLines(expected_size_info, verbose=True)) actual = list(describe.GenerateLines(size_info, verbose=True)) self.assertEquals(expected, actual) sym_strs = (repr(sym) for sym in size_info.symbols) stats = describe.DescribeSizeInfoCoverage(size_info) if size_info.metadata: metadata = describe.DescribeMetadata(size_info.metadata) else: metadata = [] return itertools.chain(metadata, stats, sym_strs)
def Run(args, parser): if not args.size_file.endswith('.size'): parser.error('size_file must end with .size') (output_directory, tool_prefix, apk_path, apk_so_path, elf_path, map_path) = ( DeduceMainPaths(args, parser)) metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory) apk_elf_result = None if apk_path and elf_path: # Extraction takes around 1 second, so do it in parallel. apk_elf_result = concurrent.ForkAndCall( _ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix)) section_sizes, raw_symbols = CreateSectionSizesAndSymbols( map_path=map_path, tool_prefix=tool_prefix, elf_path=elf_path, apk_path=apk_path, output_directory=output_directory, track_string_literals=args.track_string_literals, metadata=metadata, apk_elf_result=apk_elf_result, pak_files=args.pak_file, pak_info_file=args.pak_info_file) size_info = CreateSizeInfo( section_sizes, raw_symbols, metadata=metadata, normalize_names=False) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) logging.info('Recording metadata: \n %s', '\n '.join(describe.DescribeMetadata(size_info.metadata))) logging.info('Saving result to %s', args.size_file) file_format.SaveSizeInfo(size_info, args.size_file) size_in_mb = os.path.getsize(args.size_file) / 1024.0 / 1024.0 logging.info('Done. File size is %.2fMiB.', size_in_mb)
def _DoArchiveTest(self, use_output_directory=True, use_elf=True, debug_measures=False): with tempfile.NamedTemporaryFile(suffix='.size') as temp_file: args = [temp_file.name, '--map-file', _TEST_MAP_PATH] if use_output_directory: # Let autodetection find output_directory when --elf-file is used. if not use_elf: args += ['--output-directory', _TEST_OUTPUT_DIR] else: args += ['--no-source-paths'] if use_elf: args += ['--elf-file', _TEST_ELF_PATH] _RunApp('archive', args, debug_measures=debug_measures) size_info = archive.LoadAndPostProcessSizeInfo(temp_file.name) # Check that saving & loading is the same as directly parsing the .map. expected_size_info = self._CloneSizeInfo( use_output_directory=use_output_directory, use_elf=use_elf) self.assertEquals(expected_size_info.metadata, size_info.metadata) # Don't cluster. expected_size_info.symbols = expected_size_info.raw_symbols size_info.symbols = size_info.raw_symbols expected = list(describe.GenerateLines(expected_size_info)) actual = list(describe.GenerateLines(size_info)) self.assertEquals(expected, actual) sym_strs = (repr(sym) for sym in size_info.symbols) stats = describe.DescribeSizeInfoCoverage(size_info) if size_info.metadata: metadata = describe.DescribeMetadata(size_info.metadata) else: metadata = [] return itertools.chain(metadata, stats, sym_strs)
def test_Map2Size(self): with tempfile.NamedTemporaryFile(suffix='.size') as temp_file: _RunApp('map2size.py', '--output-directory', _TEST_DATA_DIR, '--map-file', _TEST_MAP_PATH, '', temp_file.name) size_info = map2size.Analyze(temp_file.name) sym_strs = (repr(sym) for sym in size_info.symbols) stats = describe.DescribeSizeInfoCoverage(size_info) return itertools.chain(stats, sym_strs)
def _SizeStats(self, size_info=None): """Prints some statistics for the given size info. Args: size_info: Defaults to size_infos[0]. """ size_info = size_info or self._size_infos[0] describe.WriteLines(describe.DescribeSizeInfoCoverage(size_info), sys.stdout.write)
def Analyze(path, lazy_paths=None): """Returns a SizeInfo for the given |path|. Args: path: Can be a .size file, or a .map(.gz). If the latter, then lazy_paths must be provided as well. """ if path.endswith('.size'): logging.debug('Loading results from: %s', path) size_info = file_format.LoadSizeInfo(path) # Recompute derived values (padding and function names). logging.info('Calculating padding') _RemoveDuplicatesAndCalculatePadding(size_info.symbols) logging.info('Deriving signatures') # Re-parse out function parameters. _NormalizeNames(size_info.symbols) return size_info elif not path.endswith('.map') and not path.endswith('.map.gz'): raise Exception('Expected input to be a .map or a .size') else: # output_directory needed for source file information. lazy_paths.VerifyOutputDirectory() # tool_prefix needed for c++filt. lazy_paths.VerifyToolPrefix() with _OpenMaybeGz(path) as map_file: section_sizes, symbols = linker_map_parser.MapFileParser().Parse( map_file) size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols)) # Map file for some reason doesn't unmangle all names. logging.info('Calculating padding') _RemoveDuplicatesAndCalculatePadding(size_info.symbols) # Unmangle prints its own log statement. _UnmangleRemainingSymbols(size_info.symbols, lazy_paths.tool_prefix) logging.info('Extracting source paths from .ninja files') all_found = _ExtractSourcePaths(size_info.symbols, lazy_paths.output_directory) assert all_found, ( 'One or more source file paths could not be found. Likely caused by ' '.ninja files being generated at a different time than the .map file.' ) # Resolve paths prints its own log statement. logging.info('Normalizing names') _NormalizeNames(size_info.symbols) logging.info('Normalizing paths') _NormalizeObjectPaths(size_info.symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Finished analyzing %d symbols', len(size_info.symbols)) return size_info
def _DoArchiveTest(self, use_output_directory=True, use_elf=False, use_apk=False, use_minimal_apks=False, use_pak=False, use_aux_elf=False, debug_measures=False, include_padding=False): with tempfile.NamedTemporaryFile(suffix='.size') as temp_file: self._DoArchive(temp_file.name, use_output_directory=use_output_directory, use_elf=use_elf, use_apk=use_apk, use_minimal_apks=use_minimal_apks, use_pak=use_pak, use_aux_elf=use_aux_elf, debug_measures=debug_measures, include_padding=include_padding) size_info = archive.LoadAndPostProcessSizeInfo(temp_file.name) # Check that saving & loading is the same as directly parsing. expected_size_info = self._CloneSizeInfo( use_output_directory=use_output_directory, use_elf=use_elf, use_apk=use_apk, use_minimal_apks=use_minimal_apks, use_pak=use_pak, use_aux_elf=use_aux_elf) self.assertEqual(expected_size_info.metadata, size_info.metadata) # Don't cluster. expected_size_info.symbols = expected_size_info.raw_symbols size_info.symbols = size_info.raw_symbols expected = list( describe.GenerateLines(expected_size_info, verbose=True)) actual = list(describe.GenerateLines(size_info, verbose=True)) self.assertEqual(expected, actual) sym_strs = (repr(sym) for sym in size_info.symbols) stats = describe.DescribeSizeInfoCoverage(size_info) if len(size_info.containers) == 1: # If there's only one container, merge the its metadata into build_config. merged_data_desc = describe.DescribeDict(size_info.metadata_legacy) return itertools.chain(merged_data_desc, stats, sym_strs) else: build_config = describe.DescribeDict(size_info.build_config) metadata = itertools.chain.from_iterable( describe.DescribeDict(c.metadata) for c in size_info.containers) return itertools.chain(build_config, metadata, stats, sym_strs)
def Analyze(path, output_directory=None, tool_prefix=''): if path.endswith('.size'): logging.debug('Loading results from: %s', path) size_info = file_format.LoadSizeInfo(path) # Recompute derived values (padding and function names). logging.info('Calculating padding') _RemoveDuplicatesAndCalculatePadding(size_info.symbols) logging.info('Deriving signatures') # Re-parse out function parameters. _NormalizeNames(size_info.symbols) return size_info elif not path.endswith('.map') and not path.endswith('.map.gz'): raise Exception('Expected input to be a .map or a .size') else: # Verify tool_prefix early. output_directory, tool_prefix = (_DetectToolPrefix( tool_prefix, path, output_directory)) with _OpenMaybeGz(path) as map_file: section_sizes, symbols = linker_map_parser.MapFileParser().Parse( map_file) timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path)) size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols), timestamp=timestamp) # Map file for some reason doesn't unmangle all names. logging.info('Calculating padding') _RemoveDuplicatesAndCalculatePadding(size_info.symbols) # Unmangle prints its own log statement. _UnmangleRemainingSymbols(size_info.symbols, tool_prefix) logging.info('Extracting source paths from .ninja files') _ExtractSourcePaths(size_info.symbols, output_directory) # Resolve paths prints its own log statement. logging.info('Normalizing names') _NormalizeNames(size_info.symbols) logging.info('Normalizing paths') _NormalizeObjectPaths(size_info.symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Finished analyzing %d symbols', len(size_info.symbols)) return size_info
def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, normalize_names=True): """Creates a SizeInfo. Args: map_path: Path to the linker .map(.gz) file to parse. elf_path: Path to the corresponding unstripped ELF file. Used to find symbol aliases and inlined functions. Can be None. tool_prefix: Prefix for c++filt & nm (required). output_directory: Build output directory. If None, source_paths and symbol alias information will not be recorded. """ source_mapper = None if output_directory: # Start by finding the elf_object_paths, so that nm can run on them while # the linker .map is being parsed. logging.info('Parsing ninja files.') source_mapper, elf_object_paths = ninja_parser.Parse( output_directory, elf_path) logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) assert not elf_path or elf_object_paths, ( 'Failed to find link command in ninja files for ' + os.path.relpath(elf_path, output_directory)) if elf_path: # Run nm on the elf file to retrieve the list of symbol names per-address. # This list is required because the .map file contains only a single name # for each address, yet multiple symbols are often coalesced when they are # identical. This coalescing happens mainly for small symbols and for C++ # templates. Such symbols make up ~500kb of libchrome.so on Android. elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) # Run nm on all .o/.a files to retrieve the symbol names within them. # The list is used to detect when mutiple .o files contain the same symbol # (e.g. inline functions), and to update the object_path / source_path # fields accordingly. # Looking in object files is required because the .map file choses a # single path for these symbols. # Rather than record all paths for each symbol, set the paths to be the # common ancestor of all paths. if output_directory: bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) bulk_analyzer.AnalyzePaths(elf_object_paths) logging.info('Parsing Linker Map') with _OpenMaybeGz(map_path) as map_file: section_sizes, raw_symbols = ( linker_map_parser.MapFileParser().Parse(map_file)) if elf_path: logging.debug('Validating section sizes') elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) for k, v in elf_section_sizes.iteritems(): if v != section_sizes.get(k): logging.error( 'ELF file and .map file do not agree on section sizes.') logging.error('.map file: %r', section_sizes) logging.error('readelf: %r', elf_section_sizes) sys.exit(1) if elf_path and output_directory: missed_object_paths = _DiscoverMissedObjectPaths( raw_symbols, elf_object_paths) bulk_analyzer.AnalyzePaths(missed_object_paths) bulk_analyzer.Close() if source_mapper: logging.info('Looking up source paths from ninja files') _ExtractSourcePaths(raw_symbols, source_mapper) assert source_mapper.unmatched_paths_count == 0, ( 'One or more source file paths could not be found. Likely caused by ' '.ninja files being generated at a different time than the .map file.' ) logging.info('Stripping linker prefixes from symbol names') _StripLinkerAddedSymbolPrefixes(raw_symbols) # Map file for some reason doesn't unmangle all names. # Unmangle prints its own log statement. _UnmangleRemainingSymbols(raw_symbols, tool_prefix) if elf_path: logging.info('Adding aliased symbols, as reported by nm') # This normally does not block (it's finished by this time). aliases_by_address = elf_nm_result.get() _AddSymbolAliases(raw_symbols, aliases_by_address) if output_directory: # For aliases, this provides path information where there wasn't any. logging.info('Computing ancestor paths for inline functions and ' 'normalizing object paths') object_paths_by_name = bulk_analyzer.Get() logging.debug( 'Fetched path information for %d symbols from %d files', len(object_paths_by_name), len(elf_object_paths) + len(missed_object_paths)) _ComputeAncestorPathsAndNormalizeObjectPaths( raw_symbols, object_paths_by_name, source_mapper) if not elf_path or not output_directory: logging.info('Normalizing object paths.') for symbol in raw_symbols: symbol.object_path = _NormalizeObjectPath(symbol.object_path) # Padding not really required, but it is useful to check for large padding and # log a warning. logging.info('Calculating padding') _CalculatePadding(raw_symbols) # Do not call _NormalizeNames() during archive since that method tends to need # tweaks over time. Calling it only when loading .size files allows for more # flexability. if normalize_names: _NormalizeNames(raw_symbols) logging.info('Processed %d symbols', len(raw_symbols)) size_info = models.SizeInfo(section_sizes, raw_symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) return size_info
def Run(args, parser): if not args.size_file.endswith('.size'): parser.error('size_file must end with .size') elf_path = args.elf_file map_path = args.map_file apk_path = args.apk_file pak_files = args.pak_file pak_info_file = args.pak_info_file any_input = apk_path or elf_path or map_path if not any_input: parser.error('Most pass at least one of --apk-file, --elf-file, --map-file') output_directory_finder = path_util.OutputDirectoryFinder( value=args.output_directory, any_path_within_output_directory=any_input) if apk_path: with zipfile.ZipFile(apk_path) as z: lib_infos = [f for f in z.infolist() if f.filename.endswith('.so') and f.file_size > 0] assert lib_infos, 'APK has no .so files.' # TODO(agrieve): Add support for multiple .so files, and take into account # secondary architectures. apk_so_path = max(lib_infos, key=lambda x:x.file_size).filename logging.debug('Sub-apk path=%s', apk_so_path) if not elf_path and output_directory_finder.Tentative(): elf_path = os.path.join( output_directory_finder.Tentative(), 'lib.unstripped', os.path.basename(apk_so_path.replace('crazy.', ''))) logging.debug('Detected --elf-file=%s', elf_path) if map_path: if not map_path.endswith('.map') and not map_path.endswith('.map.gz'): parser.error('Expected --map-file to end with .map or .map.gz') else: map_path = elf_path + '.map' if not os.path.exists(map_path): map_path += '.gz' if not os.path.exists(map_path): parser.error('Could not find .map(.gz)? file. Ensure you have built with ' 'is_official_build=true, or use --map-file to point me a ' 'linker map file.') linker_name = _DetectLinkerName(map_path) tool_prefix_finder = path_util.ToolPrefixFinder( value=args.tool_prefix, output_directory_finder=output_directory_finder, linker_name=linker_name) tool_prefix = tool_prefix_finder.Finalized() output_directory = None if not args.no_source_paths: output_directory = output_directory_finder.Finalized() metadata = CreateMetadata(map_path, elf_path, apk_path, tool_prefix, output_directory) if apk_path and elf_path: # Extraction takes around 1 second, so do it in parallel. apk_elf_result = concurrent.ForkAndCall( _ElfInfoFromApk, (apk_path, apk_so_path, tool_prefix)) section_sizes, raw_symbols = CreateSectionSizesAndSymbols( map_path, elf_path, tool_prefix, output_directory, track_string_literals=args.track_string_literals) if apk_path: AddApkInfo(section_sizes, raw_symbols, apk_path, output_directory, metadata, apk_elf_result) elif pak_files and pak_info_file: AddPakSymbolsFromFiles( section_sizes, raw_symbols, pak_files, pak_info_file) size_info = CreateSizeInfo( section_sizes, raw_symbols, metadata=metadata, normalize_names=False) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) logging.info('Recording metadata: \n %s', '\n '.join(describe.DescribeMetadata(size_info.metadata))) logging.info('Saving result to %s', args.size_file) file_format.SaveSizeInfo(size_info, args.size_file) size_in_mb = os.path.getsize(args.size_file) / 1024.0 / 1024.0 logging.info('Done. File size is %.2fMiB.', size_in_mb)
def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, normalize_names=True, track_string_literals=True): """Creates a SizeInfo. Args: map_path: Path to the linker .map(.gz) file to parse. elf_path: Path to the corresponding unstripped ELF file. Used to find symbol aliases and inlined functions. Can be None. tool_prefix: Prefix for c++filt & nm (required). output_directory: Build output directory. If None, source_paths and symbol alias information will not be recorded. normalize_names: Whether to normalize symbol names. track_string_literals: Whether to break down "** merge string" sections into smaller symbols (requires output_directory). """ source_mapper = None if output_directory: # Start by finding the elf_object_paths, so that nm can run on them while # the linker .map is being parsed. logging.info('Parsing ninja files.') source_mapper, elf_object_paths = ninja_parser.Parse( output_directory, elf_path) logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) assert not elf_path or elf_object_paths, ( 'Failed to find link command in ninja files for ' + os.path.relpath(elf_path, output_directory)) if elf_path: # Run nm on the elf file to retrieve the list of symbol names per-address. # This list is required because the .map file contains only a single name # for each address, yet multiple symbols are often coalesced when they are # identical. This coalescing happens mainly for small symbols and for C++ # templates. Such symbols make up ~500kb of libchrome.so on Android. elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) # Run nm on all .o/.a files to retrieve the symbol names within them. # The list is used to detect when mutiple .o files contain the same symbol # (e.g. inline functions), and to update the object_path / source_path # fields accordingly. # Looking in object files is required because the .map file choses a # single path for these symbols. # Rather than record all paths for each symbol, set the paths to be the # common ancestor of all paths. if output_directory: bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) bulk_analyzer.AnalyzePaths(elf_object_paths) logging.info('Parsing Linker Map') with _OpenMaybeGz(map_path) as map_file: section_sizes, raw_symbols = ( linker_map_parser.MapFileParser().Parse(map_file)) if elf_path: logging.debug('Validating section sizes') elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) for k, v in elf_section_sizes.iteritems(): if v != section_sizes.get(k): logging.error( 'ELF file and .map file do not agree on section sizes.') logging.error('.map file: %r', section_sizes) logging.error('readelf: %r', elf_section_sizes) sys.exit(1) if elf_path and output_directory: missed_object_paths = _DiscoverMissedObjectPaths( raw_symbols, elf_object_paths) bulk_analyzer.AnalyzePaths(missed_object_paths) bulk_analyzer.SortPaths() if track_string_literals: merge_string_syms = [ s for s in raw_symbols if s.full_name == '** merge strings' or s.full_name == '** lld merge strings' ] # More likely for there to be a bug in supersize than an ELF to not have a # single string literal. assert merge_string_syms string_positions = [(s.address, s.size) for s in merge_string_syms] bulk_analyzer.AnalyzeStringLiterals(elf_path, string_positions) logging.info('Stripping linker prefixes from symbol names') _StripLinkerAddedSymbolPrefixes(raw_symbols) # Map file for some reason doesn't unmangle all names. # Unmangle prints its own log statement. _UnmangleRemainingSymbols(raw_symbols, tool_prefix) if elf_path: logging.info( 'Adding symbols removed by identical code folding (as reported by nm)' ) # This normally does not block (it's finished by this time). names_by_address = elf_nm_result.get() _AddNmAliases(raw_symbols, names_by_address) if output_directory: object_paths_by_name = bulk_analyzer.GetSymbolNames() logging.debug( 'Fetched path information for %d symbols from %d files', len(object_paths_by_name), len(elf_object_paths) + len(missed_object_paths)) # For aliases, this provides path information where there wasn't any. logging.info( 'Creating aliases for symbols shared by multiple paths') raw_symbols = _AssignNmAliasPathsAndCreatePathAliases( raw_symbols, object_paths_by_name) if track_string_literals: logging.info( 'Waiting for string literal extraction to complete.') list_of_positions_by_object_path = bulk_analyzer.GetStringPositions( ) bulk_analyzer.Close() if track_string_literals: logging.info('Deconstructing ** merge strings into literals') replacements = _CreateMergeStringsReplacements( merge_string_syms, list_of_positions_by_object_path) for merge_sym, literal_syms in itertools.izip( merge_string_syms, replacements): # Don't replace if no literals were found. if literal_syms: # Re-find the symbols since aliases cause their indices to change. idx = raw_symbols.index(merge_sym) # This assignment is a bit slow (causes array to be shifted), but # is fast enough since len(merge_string_syms) < 10. raw_symbols[idx:idx + 1] = literal_syms _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper) logging.info('Converting excessive aliases into shared-path symbols') _CompactLargeAliasesIntoSharedSymbols(raw_symbols) logging.debug('Connecting nm aliases') _ConnectNmAliases(raw_symbols) # Padding not really required, but it is useful to check for large padding and # log a warning. logging.info('Calculating padding') _CalculatePadding(raw_symbols) # Do not call _NormalizeNames() during archive since that method tends to need # tweaks over time. Calling it only when loading .size files allows for more # flexability. if normalize_names: _NormalizeNames(raw_symbols) logging.info('Processed %d symbols', len(raw_symbols)) size_info = models.SizeInfo(section_sizes, raw_symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) return size_info