def _CreateSizeInfo(aliases=None, containers=None): build_config = {} metadata = {} section_sizes = {'.text': 100, '.bss': 40} if not containers: containers = [ models.Container('', metadata=metadata, section_sizes=section_sizes) ] models.BaseContainer.AssignShortNames(containers) TEXT = models.SECTION_TEXT symbols = [ _MakeSym(models.SECTION_DEX_METHOD, 10, 'a', 'com.Foo#bar()'), _MakeSym(TEXT, 20, 'a', '.Lfoo'), _MakeSym(TEXT, 30, 'b'), _MakeSym(TEXT, 40, 'b'), _MakeSym(TEXT, 50, 'b'), _MakeSym(TEXT, 60, ''), ] for s in symbols: s.container = containers[0] if aliases: for tup in aliases: syms = symbols[tup[0]:tup[1]] for sym in syms: sym.aliases = syms return models.SizeInfo(build_config, containers, symbols)
def _CreateSizeInfo(aliases=None): build_config = {} metadata = {} section_sizes = {'.text': 100, '.bss': 40} containers = [ models.Container(name='', metadata=metadata, section_sizes=section_sizes) ] TEXT = models.SECTION_TEXT symbols = [ _MakeSym(models.SECTION_DEX_METHOD, 10, 'a', 'com.Foo#bar()'), _MakeSym(TEXT, 20, 'a', '.Lfoo'), _MakeSym(TEXT, 30, 'b'), _MakeSym(TEXT, 40, 'b'), _MakeSym(TEXT, 50, 'b'), _MakeSym(TEXT, 60, ''), ] # For simplicity, not associating |symbols| with |containers|. if aliases: for tup in aliases: syms = symbols[tup[0]:tup[1]] for sym in syms: sym.aliases = syms return models.SizeInfo(build_config, containers, symbols)
def Analyze(path, lazy_paths=None): """Returns a SizeInfo for the given |path|. Args: path: Can be a .size file, or a .map(.gz). If the latter, then lazy_paths must be provided as well. """ if path.endswith('.size'): logging.debug('Loading results from: %s', path) size_info = file_format.LoadSizeInfo(path) # Recompute derived values (padding and function names). logging.info('Calculating padding') _RemoveDuplicatesAndCalculatePadding(size_info.symbols) logging.info('Deriving signatures') # Re-parse out function parameters. _NormalizeNames(size_info.symbols) return size_info elif not path.endswith('.map') and not path.endswith('.map.gz'): raise Exception('Expected input to be a .map or a .size') else: # output_directory needed for source file information. lazy_paths.VerifyOutputDirectory() # tool_prefix needed for c++filt. lazy_paths.VerifyToolPrefix() with _OpenMaybeGz(path) as map_file: section_sizes, symbols = linker_map_parser.MapFileParser().Parse( map_file) size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols)) # Map file for some reason doesn't unmangle all names. logging.info('Calculating padding') _RemoveDuplicatesAndCalculatePadding(size_info.symbols) # Unmangle prints its own log statement. _UnmangleRemainingSymbols(size_info.symbols, lazy_paths.tool_prefix) logging.info('Extracting source paths from .ninja files') all_found = _ExtractSourcePaths(size_info.symbols, lazy_paths.output_directory) assert all_found, ( 'One or more source file paths could not be found. Likely caused by ' '.ninja files being generated at a different time than the .map file.' ) # Resolve paths prints its own log statement. logging.info('Normalizing names') _NormalizeNames(size_info.symbols) logging.info('Normalizing paths') _NormalizeObjectPaths(size_info.symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Finished analyzing %d symbols', len(size_info.symbols)) return size_info
def _CreateSizeInfo(aliases=None): section_sizes = {'.text': 100, '.bss': 40} TEXT = models.SECTION_TEXT symbols = [ _MakeSym(models.SECTION_DEX_METHOD, 10, 'a', 'com.Foo#bar()'), _MakeSym(TEXT, 20, 'a', '.Lfoo'), _MakeSym(TEXT, 30, 'b'), _MakeSym(TEXT, 40, 'b'), _MakeSym(TEXT, 50, 'b'), _MakeSym(TEXT, 60, ''), ] if aliases: for tup in aliases: syms = symbols[tup[0]:tup[1]] for sym in syms: sym.aliases = syms return models.SizeInfo(section_sizes, symbols)
def CreateSizeInfo( section_sizes, raw_symbols, metadata=None, normalize_names=True): """Performs operations on all symbols and creates a SizeInfo object.""" # Padding not really required, but it is useful to check for large padding and # log a warning. logging.info('Calculating padding') _CalculatePadding(raw_symbols) # Do not call _NormalizeNames() during archive since that method tends to need # tweaks over time. Calling it only when loading .size files allows for more # flexability. if normalize_names: _NormalizeNames(raw_symbols) raw_symbols.sort(key=lambda s: ( s.IsPak(), s.IsBss(), s.section_name, s.address)) logging.info('Processed %d symbols', len(raw_symbols)) return models.SizeInfo(section_sizes, raw_symbols, metadata=metadata)
def Analyze(path, output_directory=None, tool_prefix=''): if path.endswith('.size'): logging.debug('Loading results from: %s', path) size_info = file_format.LoadSizeInfo(path) # Recompute derived values (padding and function names). logging.info('Calculating padding') _RemoveDuplicatesAndCalculatePadding(size_info.symbols) logging.info('Deriving signatures') # Re-parse out function parameters. _NormalizeNames(size_info.symbols) return size_info elif not path.endswith('.map') and not path.endswith('.map.gz'): raise Exception('Expected input to be a .map or a .size') else: # Verify tool_prefix early. output_directory, tool_prefix = (_DetectToolPrefix( tool_prefix, path, output_directory)) with _OpenMaybeGz(path) as map_file: section_sizes, symbols = linker_map_parser.MapFileParser().Parse( map_file) timestamp = datetime.datetime.utcfromtimestamp(os.path.getmtime(path)) size_info = models.SizeInfo(section_sizes, models.SymbolGroup(symbols), timestamp=timestamp) # Map file for some reason doesn't unmangle all names. logging.info('Calculating padding') _RemoveDuplicatesAndCalculatePadding(size_info.symbols) # Unmangle prints its own log statement. _UnmangleRemainingSymbols(size_info.symbols, tool_prefix) logging.info('Extracting source paths from .ninja files') _ExtractSourcePaths(size_info.symbols, output_directory) # Resolve paths prints its own log statement. logging.info('Normalizing names') _NormalizeNames(size_info.symbols) logging.info('Normalizing paths') _NormalizeObjectPaths(size_info.symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Finished analyzing %d symbols', len(size_info.symbols)) return size_info
def _LoadSizeInfoFromFile(file_obj, size_path): """Loads a size_info from the given file. See _SaveSizeInfoToFile for details on the .size file format. Args: file_obj: File to read, should be a GzipFile """ lines = iter(file_obj) _ReadLine(lines) # Line 0: Created by supersize header actual_version = _ReadLine(lines) assert actual_version == _SERIALIZATION_VERSION, ( 'Version mismatch. Need to write some upgrade code.') # JSON metadata json_len = int(_ReadLine(lines)) json_str = file_obj.read(json_len) headers = json.loads(json_str) section_sizes = headers['section_sizes'] metadata = headers.get('metadata') has_components = headers.get('has_components', False) lines = iter(file_obj) _ReadLine(lines) # Path list num_path_tuples = int(_ReadLine(lines)) # Line 4 - number of paths in list # Read the path list values and store for later path_tuples = [ _ReadValuesFromLine(lines, split='\t') for _ in xrange(num_path_tuples) ] # Component list if has_components: num_components = int(_ReadLine(lines)) # number of components in list components = [_ReadLine(lines) for _ in xrange(num_components)] # Symbol counts by section. section_names = _ReadValuesFromLine(lines, split='\t') section_counts = [int(c) for c in _ReadValuesFromLine(lines, split='\t')] # Addresses, sizes, path indicies, component indicies def read_numeric(delta=False): """Read numeric values, where each line corresponds to a symbol group. The values in each line are space seperated. If |delta| is True, the numbers are read as a value to add to the sum of the prior values in the line, or as the amount to change by. """ ret = [] delta_multiplier = int(delta) for _ in section_counts: value = 0 fields = [] for f in _ReadValuesFromLine(lines, split=' '): value = value * delta_multiplier + int(f) fields.append(value) ret.append(fields) return ret addresses = read_numeric(delta=True) sizes = read_numeric(delta=False) path_indices = read_numeric(delta=True) if has_components: component_indices = read_numeric(delta=True) else: component_indices = [None] * len(section_names) raw_symbols = [None] * sum(section_counts) symbol_idx = 0 for (cur_section_name, cur_section_count, cur_addresses, cur_sizes, cur_path_indicies, cur_component_indices) in itertools.izip( section_names, section_counts, addresses, sizes, path_indices, component_indices): alias_counter = 0 for i in xrange(cur_section_count): parts = _ReadValuesFromLine(lines, split='\t') full_name = parts[0] flags_part = None aliases_part = None # aliases_part or flags_part may have been omitted. if len(parts) == 3: # full_name aliases_part flags_part aliases_part = parts[1] flags_part = parts[2] elif len(parts) == 2: if parts[1][0] == '0': # full_name aliases_part aliases_part = parts[1] else: # full_name flags_part flags_part = parts[1] # Use a bit less RAM by using the same instance for this common string. if full_name == models.STRING_LITERAL_NAME: full_name = models.STRING_LITERAL_NAME flags = int(flags_part, 16) if flags_part else 0 num_aliases = int(aliases_part, 16) if aliases_part else 0 # Skip the constructor to avoid default value checks new_sym = models.Symbol.__new__(models.Symbol) new_sym.section_name = cur_section_name new_sym.full_name = full_name new_sym.address = cur_addresses[i] new_sym.size = cur_sizes[i] paths = path_tuples[cur_path_indicies[i]] new_sym.object_path, new_sym.source_path = paths component = components[ cur_component_indices[i]] if has_components else '' new_sym.component = component new_sym.flags = flags # Derived new_sym.padding = 0 new_sym.template_name = '' new_sym.name = '' if num_aliases: assert alias_counter == 0 new_sym.aliases = [new_sym] alias_counter = num_aliases - 1 elif alias_counter > 0: new_sym.aliases = raw_symbols[symbol_idx - 1].aliases new_sym.aliases.append(new_sym) alias_counter -= 1 else: new_sym.aliases = None raw_symbols[symbol_idx] = new_sym symbol_idx += 1 return models.SizeInfo(section_sizes, raw_symbols, metadata=metadata, size_path=size_path)
def _LoadSizeInfoFromFile(file_obj, size_path): """Loads a size_info from the given file. See _SaveSizeInfoToFile() for details on the .size file format. Args: file_obj: File to read, should be a GzipFile """ # Split lines on '\n', since '\r' can appear in some lines! lines = io.TextIOWrapper(file_obj, newline='\n') header_line = _ReadLine(lines).encode('ascii') assert header_line == _COMMON_HEADER[:-1], 'was ' + str(header_line) header_line = _ReadLine(lines).encode('ascii') if header_line == _SIZE_HEADER_SINGLE_CONTAINER[:-1]: has_multi_containers = False elif header_line == _SIZE_HEADER_MULTI_CONTAINER[:-1]: has_multi_containers = True else: raise ValueError('Version mismatch. Need to write some upgrade code.') # JSON header fields json_len = int(_ReadLine(lines)) json_str = lines.read(json_len) fields = json.loads(json_str) assert ('containers' in fields) == has_multi_containers assert ('build_config' in fields) == has_multi_containers assert ('containers' in fields) == has_multi_containers assert ('metadata' not in fields) == has_multi_containers assert ('section_sizes' not in fields) == has_multi_containers containers = [] if has_multi_containers: # New format. build_config = fields['build_config'] for cfield in fields['containers']: c = models.Container(name=cfield['name'], metadata=cfield['metadata'], section_sizes=cfield['section_sizes']) containers.append(c) else: # Old format. build_config = {} metadata = fields.get('metadata') if metadata: for key in models.BUILD_CONFIG_KEYS: if key in metadata: build_config[key] = metadata[key] del metadata[key] section_sizes = fields['section_sizes'] containers.append( models.Container(name='', metadata=metadata, section_sizes=section_sizes)) models.Container.AssignShortNames(containers) has_components = fields.get('has_components', False) has_padding = fields.get('has_padding', False) # Eat empty line. _ReadLine(lines) # Path list. num_path_tuples = int(_ReadLine(lines)) # Number of paths in list. # Read the path list values and store for later. path_tuples = [ _ReadValuesFromLine(lines, split='\t') for _ in range(num_path_tuples) ] if num_path_tuples == 0: logging.warning('File contains no symbols: %s', size_path) return models.SizeInfo(build_config, containers, [], size_path=size_path) # Component list. if has_components: num_components = int(_ReadLine(lines)) # Number of components in list. components = [_ReadLine(lines) for _ in range(num_components)] # Symbol counts by "segments", defined as (container, section) tuples. segment_names = _ReadValuesFromLine(lines, split='\t') symbol_counts = [int(c) for c in _ReadValuesFromLine(lines, split='\t')] # Addresses, sizes, paddings, path indices, component indices. def read_numeric(delta=False): """Read numeric values, where each line corresponds to a symbol group. The values in each line are space separated. If |delta| is True, the numbers are read as a value to add to the sum of the prior values in the line, or as the amount to change by. """ ret = [] delta_multiplier = int(delta) for _ in symbol_counts: value = 0 fields = [] for f in _ReadValuesFromLine(lines, split=' '): value = value * delta_multiplier + int(f) fields.append(value) ret.append(fields) return ret addresses = read_numeric(delta=True) sizes = read_numeric(delta=False) if has_padding: paddings = read_numeric(delta=False) else: paddings = [None] * len(segment_names) path_indices = read_numeric(delta=True) if has_components: component_indices = read_numeric(delta=True) else: component_indices = [None] * len(segment_names) raw_symbols = [None] * sum(symbol_counts) symbol_idx = 0 for (cur_segment_name, cur_symbol_count, cur_addresses, cur_sizes, cur_paddings, cur_path_indices, cur_component_indices) in zip(segment_names, symbol_counts, addresses, sizes, paddings, path_indices, component_indices): if has_multi_containers: # Extract '<cur_container_idx_str>cur_section_name'. assert cur_segment_name.startswith('<') cur_container_idx_str, cur_section_name = ( cur_segment_name[1:].split('>', 1)) cur_container = containers[int(cur_container_idx_str)] else: cur_section_name = cur_segment_name cur_container = containers[0] alias_counter = 0 for i in range(cur_symbol_count): parts = _ReadValuesFromLine(lines, split='\t') full_name = parts[0] flags_part = None aliases_part = None # aliases_part or flags_part may have been omitted. if len(parts) == 3: # full_name aliases_part flags_part aliases_part = parts[1] flags_part = parts[2] elif len(parts) == 2: if parts[1][0] == '0': # full_name aliases_part aliases_part = parts[1] else: # full_name flags_part flags_part = parts[1] # Use a bit less RAM by using the same instance for this common string. if full_name == models.STRING_LITERAL_NAME: full_name = models.STRING_LITERAL_NAME flags = int(flags_part, 16) if flags_part else 0 num_aliases = int(aliases_part, 16) if aliases_part else 0 # Skip the constructor to avoid default value checks. new_sym = models.Symbol.__new__(models.Symbol) new_sym.container = cur_container new_sym.section_name = cur_section_name new_sym.full_name = full_name new_sym.address = cur_addresses[i] new_sym.size = cur_sizes[i] paths = path_tuples[cur_path_indices[i]] new_sym.object_path, new_sym.source_path = paths component = components[ cur_component_indices[i]] if has_components else '' new_sym.component = component new_sym.flags = flags # Derived. if cur_paddings: new_sym.padding = cur_paddings[i] if not new_sym.IsOverhead(): new_sym.size += new_sym.padding else: new_sym.padding = 0 # Computed below. new_sym.template_name = '' new_sym.name = '' if num_aliases: assert alias_counter == 0 new_sym.aliases = [new_sym] alias_counter = num_aliases - 1 elif alias_counter > 0: new_sym.aliases = raw_symbols[symbol_idx - 1].aliases new_sym.aliases.append(new_sym) alias_counter -= 1 else: new_sym.aliases = None raw_symbols[symbol_idx] = new_sym symbol_idx += 1 if not has_padding: CalculatePadding(raw_symbols) return models.SizeInfo(build_config, containers, raw_symbols, size_path=size_path)
def _LoadSizeInfoFromFile(file_obj): """Loads a size_info from the given file.""" lines = iter(file_obj) next(lines) # Comment line. actual_version = next(lines)[:-1] assert actual_version == _SERIALIZATION_VERSION, ( 'Version mismatch. Need to write some upgrade code.') json_len = int(next(lines)) json_str = file_obj.read(json_len) headers = json.loads(json_str) section_sizes = headers['section_sizes'] metadata = headers.get('metadata') lines = iter(file_obj) next(lines) # newline after closing } of json. num_path_tuples = int(next(lines)) path_tuples = [None] * num_path_tuples for i in xrange(num_path_tuples): path_tuples[i] = next(lines)[:-1].split('\t') section_names = next(lines)[:-1].split('\t') section_counts = [int(c) for c in next(lines)[:-1].split('\t')] def read_numeric(delta=False): ret = [] delta_multiplier = int(delta) for _ in section_counts: value = 0 fields = next(lines).split(' ') for i, f in enumerate(fields): value = value * delta_multiplier + int(f) fields[i] = value ret.append(fields) return ret addresses = read_numeric(delta=True) sizes = read_numeric(delta=False) path_indices = read_numeric(delta=True) symbol_list = [None] * sum(section_counts) symbol_idx = 0 for section_index, cur_section_name in enumerate(section_names): for i in xrange(section_counts[section_index]): line = next(lines)[:-1] is_anonymous = line.endswith('\t1') name = line[:-2] if is_anonymous else line new_sym = models.Symbol.__new__(models.Symbol) new_sym.section_name = cur_section_name new_sym.address = addresses[section_index][i] new_sym.size = sizes[section_index][i] new_sym.name = name paths = path_tuples[path_indices[section_index][i]] new_sym.object_path = paths[0] new_sym.source_path = paths[1] new_sym.is_anonymous = is_anonymous new_sym.padding = 0 # Derived new_sym.full_name = None # Derived symbol_list[symbol_idx] = new_sym symbol_idx += 1 symbols = models.SymbolGroup(symbol_list) return models.SizeInfo(section_sizes, symbols, metadata=metadata)
def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, normalize_names=True): """Creates a SizeInfo. Args: map_path: Path to the linker .map(.gz) file to parse. elf_path: Path to the corresponding unstripped ELF file. Used to find symbol aliases and inlined functions. Can be None. tool_prefix: Prefix for c++filt & nm (required). output_directory: Build output directory. If None, source_paths and symbol alias information will not be recorded. """ source_mapper = None if output_directory: # Start by finding the elf_object_paths, so that nm can run on them while # the linker .map is being parsed. logging.info('Parsing ninja files.') source_mapper, elf_object_paths = ninja_parser.Parse( output_directory, elf_path) logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) assert not elf_path or elf_object_paths, ( 'Failed to find link command in ninja files for ' + os.path.relpath(elf_path, output_directory)) if elf_path: # Run nm on the elf file to retrieve the list of symbol names per-address. # This list is required because the .map file contains only a single name # for each address, yet multiple symbols are often coalesced when they are # identical. This coalescing happens mainly for small symbols and for C++ # templates. Such symbols make up ~500kb of libchrome.so on Android. elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) # Run nm on all .o/.a files to retrieve the symbol names within them. # The list is used to detect when mutiple .o files contain the same symbol # (e.g. inline functions), and to update the object_path / source_path # fields accordingly. # Looking in object files is required because the .map file choses a # single path for these symbols. # Rather than record all paths for each symbol, set the paths to be the # common ancestor of all paths. if output_directory: bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) bulk_analyzer.AnalyzePaths(elf_object_paths) logging.info('Parsing Linker Map') with _OpenMaybeGz(map_path) as map_file: section_sizes, raw_symbols = ( linker_map_parser.MapFileParser().Parse(map_file)) if elf_path: logging.debug('Validating section sizes') elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) for k, v in elf_section_sizes.iteritems(): if v != section_sizes.get(k): logging.error( 'ELF file and .map file do not agree on section sizes.') logging.error('.map file: %r', section_sizes) logging.error('readelf: %r', elf_section_sizes) sys.exit(1) if elf_path and output_directory: missed_object_paths = _DiscoverMissedObjectPaths( raw_symbols, elf_object_paths) bulk_analyzer.AnalyzePaths(missed_object_paths) bulk_analyzer.Close() if source_mapper: logging.info('Looking up source paths from ninja files') _ExtractSourcePaths(raw_symbols, source_mapper) assert source_mapper.unmatched_paths_count == 0, ( 'One or more source file paths could not be found. Likely caused by ' '.ninja files being generated at a different time than the .map file.' ) logging.info('Stripping linker prefixes from symbol names') _StripLinkerAddedSymbolPrefixes(raw_symbols) # Map file for some reason doesn't unmangle all names. # Unmangle prints its own log statement. _UnmangleRemainingSymbols(raw_symbols, tool_prefix) if elf_path: logging.info('Adding aliased symbols, as reported by nm') # This normally does not block (it's finished by this time). aliases_by_address = elf_nm_result.get() _AddSymbolAliases(raw_symbols, aliases_by_address) if output_directory: # For aliases, this provides path information where there wasn't any. logging.info('Computing ancestor paths for inline functions and ' 'normalizing object paths') object_paths_by_name = bulk_analyzer.Get() logging.debug( 'Fetched path information for %d symbols from %d files', len(object_paths_by_name), len(elf_object_paths) + len(missed_object_paths)) _ComputeAncestorPathsAndNormalizeObjectPaths( raw_symbols, object_paths_by_name, source_mapper) if not elf_path or not output_directory: logging.info('Normalizing object paths.') for symbol in raw_symbols: symbol.object_path = _NormalizeObjectPath(symbol.object_path) # Padding not really required, but it is useful to check for large padding and # log a warning. logging.info('Calculating padding') _CalculatePadding(raw_symbols) # Do not call _NormalizeNames() during archive since that method tends to need # tweaks over time. Calling it only when loading .size files allows for more # flexability. if normalize_names: _NormalizeNames(raw_symbols) logging.info('Processed %d symbols', len(raw_symbols)) size_info = models.SizeInfo(section_sizes, raw_symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) return size_info
def _LoadSizeInfoFromFile(file_obj, size_path): """Loads a size_info from the given file.""" lines = iter(file_obj) next(lines) # Comment line. actual_version = next(lines)[:-1] assert actual_version == _SERIALIZATION_VERSION, ( 'Version mismatch. Need to write some upgrade code.') json_len = int(next(lines)) json_str = file_obj.read(json_len) headers = json.loads(json_str) section_sizes = headers['section_sizes'] metadata = headers.get('metadata') lines = iter(file_obj) next(lines) # newline after closing } of json. num_path_tuples = int(next(lines)) path_tuples = [None] * num_path_tuples for i in xrange(num_path_tuples): path_tuples[i] = next(lines)[:-1].split('\t') section_names = next(lines)[:-1].split('\t') section_counts = [int(c) for c in next(lines)[:-1].split('\t')] def read_numeric(delta=False): ret = [] delta_multiplier = int(delta) for _ in section_counts: value = 0 fields = next(lines).split(' ') for i, f in enumerate(fields): value = value * delta_multiplier + int(f) fields[i] = value ret.append(fields) return ret addresses = read_numeric(delta=True) sizes = read_numeric(delta=False) path_indices = read_numeric(delta=True) raw_symbols = [None] * sum(section_counts) symbol_idx = 0 for section_index, cur_section_name in enumerate(section_names): alias_counter = 0 for i in xrange(section_counts[section_index]): parts = next(lines)[:-1].split('\t') flags_part = None aliases_part = None if len(parts) == 3: aliases_part = parts[1] flags_part = parts[2] elif len(parts) == 2: if parts[1][0] == '0': aliases_part = parts[1] else: flags_part = parts[1] full_name = parts[0] # Use a bit less RAM by using the same instance for this common string. if full_name == models.STRING_LITERAL_NAME: full_name = models.STRING_LITERAL_NAME flags = int(flags_part, 16) if flags_part else 0 num_aliases = int(aliases_part, 16) if aliases_part else 0 new_sym = models.Symbol.__new__(models.Symbol) new_sym.section_name = cur_section_name new_sym.address = addresses[section_index][i] new_sym.size = sizes[section_index][i] new_sym.full_name = full_name paths = path_tuples[path_indices[section_index][i]] new_sym.object_path = paths[0] new_sym.source_path = paths[1] new_sym.flags = flags new_sym.padding = 0 # Derived new_sym.template_name = '' # Derived new_sym.name = '' # Derived if num_aliases: assert alias_counter == 0 new_sym.aliases = [new_sym] alias_counter = num_aliases - 1 elif alias_counter > 0: new_sym.aliases = raw_symbols[symbol_idx - 1].aliases new_sym.aliases.append(new_sym) alias_counter -= 1 else: new_sym.aliases = None raw_symbols[symbol_idx] = new_sym symbol_idx += 1 return models.SizeInfo(section_sizes, raw_symbols, metadata=metadata, size_path=size_path)
def _LoadSizeInfoFromFile(file_obj, size_path): """Loads a size_info from the given file. See _SaveSizeInfoToFile() for details on the .size file format. Args: file_obj: File to read, should be a GzipFile """ # Split lines on '\n', since '\r' can appear in some lines! lines = io.TextIOWrapper(file_obj, newline='\n') _ReadLine(lines) # Line 0: Created by supersize header actual_version = _ReadLine(lines) assert actual_version == _SERIALIZATION_VERSION, ( 'Version mismatch. Need to write some upgrade code.') # JSON header fields json_len = int(_ReadLine(lines)) json_str = lines.read(json_len) fields = json.loads(json_str) has_multi_containers = False containers = [] if has_multi_containers: # New format. raise ValueError('Multiple container not yet supported.') else: # Parse old format, but separate data into build_config and metadata. build_config = {} metadata = fields.get('metadata') if metadata: for key in models.BUILD_CONFIG_KEYS: if key in metadata: build_config[key] = metadata[key] del metadata[key] section_sizes = fields['section_sizes'] containers.append( models.Container(name='', metadata=metadata, section_sizes=section_sizes)) has_components = fields.get('has_components', False) has_padding = fields.get('has_padding', False) # Eat empty line. _ReadLine(lines) # Path list num_path_tuples = int(_ReadLine(lines)) # Number of paths in list # Read the path list values and store for later path_tuples = [ _ReadValuesFromLine(lines, split='\t') for _ in range(num_path_tuples) ] # Component list if has_components: num_components = int(_ReadLine(lines)) # number of components in list components = [_ReadLine(lines) for _ in range(num_components)] # Symbol counts by section. section_names = _ReadValuesFromLine(lines, split='\t') symbol_counts = [int(c) for c in _ReadValuesFromLine(lines, split='\t')] # Addresses, sizes, paddings, path indices, component indices def read_numeric(delta=False): """Read numeric values, where each line corresponds to a symbol group. The values in each line are space separated. If |delta| is True, the numbers are read as a value to add to the sum of the prior values in the line, or as the amount to change by. """ ret = [] delta_multiplier = int(delta) for _ in symbol_counts: value = 0 fields = [] for f in _ReadValuesFromLine(lines, split=' '): value = value * delta_multiplier + int(f) fields.append(value) ret.append(fields) return ret addresses = read_numeric(delta=True) sizes = read_numeric(delta=False) if has_padding: paddings = read_numeric(delta=False) else: paddings = [None] * len(section_names) path_indices = read_numeric(delta=True) if has_components: component_indices = read_numeric(delta=True) else: component_indices = [None] * len(section_names) raw_symbols = [None] * sum(symbol_counts) symbol_idx = 0 for (cur_section_name, cur_symbol_count, cur_addresses, cur_sizes, cur_paddings, cur_path_indices, cur_component_indices) in zip(section_names, symbol_counts, addresses, sizes, paddings, path_indices, component_indices): if has_multi_containers: raise ValueError('Multiple container not yet supported.') else: cur_container = containers[0] alias_counter = 0 for i in range(cur_symbol_count): parts = _ReadValuesFromLine(lines, split='\t') full_name = parts[0] flags_part = None aliases_part = None # aliases_part or flags_part may have been omitted. if len(parts) == 3: # full_name aliases_part flags_part aliases_part = parts[1] flags_part = parts[2] elif len(parts) == 2: if parts[1][0] == '0': # full_name aliases_part aliases_part = parts[1] else: # full_name flags_part flags_part = parts[1] # Use a bit less RAM by using the same instance for this common string. if full_name == models.STRING_LITERAL_NAME: full_name = models.STRING_LITERAL_NAME flags = int(flags_part, 16) if flags_part else 0 num_aliases = int(aliases_part, 16) if aliases_part else 0 # Skip the constructor to avoid default value checks new_sym = models.Symbol.__new__(models.Symbol) new_sym.container = cur_container new_sym.section_name = cur_section_name new_sym.full_name = full_name new_sym.address = cur_addresses[i] new_sym.size = cur_sizes[i] paths = path_tuples[cur_path_indices[i]] new_sym.object_path, new_sym.source_path = paths component = components[ cur_component_indices[i]] if has_components else '' new_sym.component = component new_sym.flags = flags # Derived if cur_paddings: new_sym.padding = cur_paddings[i] new_sym.size += new_sym.padding else: # This will be computed during CreateSizeInfo() new_sym.padding = 0 new_sym.template_name = '' new_sym.name = '' if num_aliases: assert alias_counter == 0 new_sym.aliases = [new_sym] alias_counter = num_aliases - 1 elif alias_counter > 0: new_sym.aliases = raw_symbols[symbol_idx - 1].aliases new_sym.aliases.append(new_sym) alias_counter -= 1 else: new_sym.aliases = None raw_symbols[symbol_idx] = new_sym symbol_idx += 1 if not has_padding: CalculatePadding(raw_symbols) return models.SizeInfo(build_config, containers, raw_symbols, size_path=size_path)
def CreateSizeInfo(map_path, elf_path, tool_prefix, output_directory, normalize_names=True, track_string_literals=True): """Creates a SizeInfo. Args: map_path: Path to the linker .map(.gz) file to parse. elf_path: Path to the corresponding unstripped ELF file. Used to find symbol aliases and inlined functions. Can be None. tool_prefix: Prefix for c++filt & nm (required). output_directory: Build output directory. If None, source_paths and symbol alias information will not be recorded. normalize_names: Whether to normalize symbol names. track_string_literals: Whether to break down "** merge string" sections into smaller symbols (requires output_directory). """ source_mapper = None if output_directory: # Start by finding the elf_object_paths, so that nm can run on them while # the linker .map is being parsed. logging.info('Parsing ninja files.') source_mapper, elf_object_paths = ninja_parser.Parse( output_directory, elf_path) logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count) assert not elf_path or elf_object_paths, ( 'Failed to find link command in ninja files for ' + os.path.relpath(elf_path, output_directory)) if elf_path: # Run nm on the elf file to retrieve the list of symbol names per-address. # This list is required because the .map file contains only a single name # for each address, yet multiple symbols are often coalesced when they are # identical. This coalescing happens mainly for small symbols and for C++ # templates. Such symbols make up ~500kb of libchrome.so on Android. elf_nm_result = nm.CollectAliasesByAddressAsync(elf_path, tool_prefix) # Run nm on all .o/.a files to retrieve the symbol names within them. # The list is used to detect when mutiple .o files contain the same symbol # (e.g. inline functions), and to update the object_path / source_path # fields accordingly. # Looking in object files is required because the .map file choses a # single path for these symbols. # Rather than record all paths for each symbol, set the paths to be the # common ancestor of all paths. if output_directory: bulk_analyzer = nm.BulkObjectFileAnalyzer(tool_prefix, output_directory) bulk_analyzer.AnalyzePaths(elf_object_paths) logging.info('Parsing Linker Map') with _OpenMaybeGz(map_path) as map_file: section_sizes, raw_symbols = ( linker_map_parser.MapFileParser().Parse(map_file)) if elf_path: logging.debug('Validating section sizes') elf_section_sizes = _SectionSizesFromElf(elf_path, tool_prefix) for k, v in elf_section_sizes.iteritems(): if v != section_sizes.get(k): logging.error( 'ELF file and .map file do not agree on section sizes.') logging.error('.map file: %r', section_sizes) logging.error('readelf: %r', elf_section_sizes) sys.exit(1) if elf_path and output_directory: missed_object_paths = _DiscoverMissedObjectPaths( raw_symbols, elf_object_paths) bulk_analyzer.AnalyzePaths(missed_object_paths) bulk_analyzer.SortPaths() if track_string_literals: merge_string_syms = [ s for s in raw_symbols if s.full_name == '** merge strings' or s.full_name == '** lld merge strings' ] # More likely for there to be a bug in supersize than an ELF to not have a # single string literal. assert merge_string_syms string_positions = [(s.address, s.size) for s in merge_string_syms] bulk_analyzer.AnalyzeStringLiterals(elf_path, string_positions) logging.info('Stripping linker prefixes from symbol names') _StripLinkerAddedSymbolPrefixes(raw_symbols) # Map file for some reason doesn't unmangle all names. # Unmangle prints its own log statement. _UnmangleRemainingSymbols(raw_symbols, tool_prefix) if elf_path: logging.info( 'Adding symbols removed by identical code folding (as reported by nm)' ) # This normally does not block (it's finished by this time). names_by_address = elf_nm_result.get() _AddNmAliases(raw_symbols, names_by_address) if output_directory: object_paths_by_name = bulk_analyzer.GetSymbolNames() logging.debug( 'Fetched path information for %d symbols from %d files', len(object_paths_by_name), len(elf_object_paths) + len(missed_object_paths)) # For aliases, this provides path information where there wasn't any. logging.info( 'Creating aliases for symbols shared by multiple paths') raw_symbols = _AssignNmAliasPathsAndCreatePathAliases( raw_symbols, object_paths_by_name) if track_string_literals: logging.info( 'Waiting for string literal extraction to complete.') list_of_positions_by_object_path = bulk_analyzer.GetStringPositions( ) bulk_analyzer.Close() if track_string_literals: logging.info('Deconstructing ** merge strings into literals') replacements = _CreateMergeStringsReplacements( merge_string_syms, list_of_positions_by_object_path) for merge_sym, literal_syms in itertools.izip( merge_string_syms, replacements): # Don't replace if no literals were found. if literal_syms: # Re-find the symbols since aliases cause their indices to change. idx = raw_symbols.index(merge_sym) # This assignment is a bit slow (causes array to be shifted), but # is fast enough since len(merge_string_syms) < 10. raw_symbols[idx:idx + 1] = literal_syms _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper) logging.info('Converting excessive aliases into shared-path symbols') _CompactLargeAliasesIntoSharedSymbols(raw_symbols) logging.debug('Connecting nm aliases') _ConnectNmAliases(raw_symbols) # Padding not really required, but it is useful to check for large padding and # log a warning. logging.info('Calculating padding') _CalculatePadding(raw_symbols) # Do not call _NormalizeNames() during archive since that method tends to need # tweaks over time. Calling it only when loading .size files allows for more # flexability. if normalize_names: _NormalizeNames(raw_symbols) logging.info('Processed %d symbols', len(raw_symbols)) size_info = models.SizeInfo(section_sizes, raw_symbols) if logging.getLogger().isEnabledFor(logging.INFO): for line in describe.DescribeSizeInfoCoverage(size_info): logging.info(line) logging.info('Recorded info for %d symbols', len(size_info.raw_symbols)) return size_info