def AnalyzeStringLiterals(self, elf_path, elf_string_positions): logging.debug('worker: AnalyzeStringLiterals() started.') # Read string_data from elf_path, to be shared by forked processes. address, offset, _ = string_extract.LookupElfRodataInfo( elf_path, self._tool_prefix) adjust = address - offset abs_string_positions = ((addr - adjust, s) for addr, s in elf_string_positions) string_data = string_extract.ReadFileChunks(elf_path, abs_string_positions) params = ((chunk, ) for chunk in self._encoded_string_addresses_by_path_chunks) # Order of the jobs doesn't matter since each job owns independent paths, # and our output is a dict where paths are the key. results = concurrent.BulkForkAndCall( string_extract.ResolveStringPieces, params, string_data=string_data, tool_prefix=self._tool_prefix, output_directory=self._output_directory) results = list(results) final_result = [] for i in xrange(len(elf_string_positions)): final_result.append( concurrent.JoinEncodedDictOfLists([r[i] for r in results])) self._list_of_encoded_elf_string_positions_by_path = final_result logging.debug('worker: AnalyzeStringLiterals() completed.')
def _ReadElfStringData(self, elf_path, elf_string_ranges): # Read string_data from elf_path, to be shared with forked processes. address, offset, _ = string_extract.LookupElfRodataInfo( elf_path, self._tool_prefix) adjust = address - offset abs_elf_string_ranges = ((addr - adjust, s) for addr, s in elf_string_ranges) return string_extract.ReadFileChunks(elf_path, abs_elf_string_ranges)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--multiprocess', action='store_true') parser.add_argument('--tool-prefix', required=True) parser.add_argument('--output-directory', required=True) parser.add_argument('--elf-file', type=os.path.realpath) parser.add_argument('--show-names', action='store_true') parser.add_argument('--show-strings', action='store_true') parser.add_argument('objects', type=os.path.realpath, nargs='+') args = parser.parse_args() logging.basicConfig( level=logging.DEBUG, format='%(levelname).1s %(relativeCreated)6d %(message)s') if args.multiprocess: bulk_analyzer = _BulkObjectFileAnalyzerMaster(args.tool_prefix, args.output_directory) else: concurrent.DISABLE_ASYNC = True bulk_analyzer = _BulkObjectFileAnalyzerWorker(args.tool_prefix, args.output_directory) # Pass individually to test multiple calls. for path in args.objects: bulk_analyzer.AnalyzePaths([path]) bulk_analyzer.SortPaths() names_to_paths = bulk_analyzer.GetSymbolNames() print('Found {} names'.format(len(names_to_paths))) if args.show_names: for name, paths in names_to_paths.iteritems(): print('{}: {!r}'.format(name, paths)) if args.elf_file: address, offset, size = string_extract.LookupElfRodataInfo( args.elf_file, args.tool_prefix) bulk_analyzer.AnalyzeStringLiterals(args.elf_file, ((address, size), )) positions_by_path = bulk_analyzer.GetStringPositions()[0] print('Found {} string literals'.format( sum(len(v) for v in positions_by_path.itervalues()))) if args.show_strings: logging.debug('.rodata adjust=%d', address - offset) for path, positions in positions_by_path.iteritems(): strs = string_extract.ReadFileChunks( args.elf_file, ((offset + addr, size) for addr, size in positions)) print('{}: {!r}'.format( path, [s if len(s) < 20 else s[:20] + '...' for s in strs]))
def _ReadStringLiterals(self, thing=None, all_rodata=False, elf_path=None): """Returns a list of (symbol, string value) for all string literal symbols. E.g.: # Print sorted list of all string literals: Print(sorted(x[1] for x in ReadStringLiterals())) Args: thing: Can be a Symbol, iterable of symbols, or SizeInfo. Defaults to the current SizeInfo. all_rodata: Assume every symbol within .rodata that ends in a \0 is a string literal. elf_path: Path to the executable containing the symbol. Required only when auto-detection fails. """ if thing is None: thing = self._size_infos[-1] if isinstance(thing, models.SizeInfo): thing = thing.raw_symbols.IterUniqueSymbols() elif isinstance(thing, models.BaseSymbol): thing = thing.IterLeafSymbols() thing, thing_clone = itertools.tee(thing) first_sym = next(thing_clone, None) if not first_sym: return [] size_info = self._SizeInfoForSymbol(first_sym) tool_prefix = self._ToolPrefixForSymbol(size_info) elf_path = self._ElfPathForSymbol( size_info, tool_prefix, elf_path) address, offset, _ = string_extract.LookupElfRodataInfo( elf_path, tool_prefix) adjust = offset - address ret = [] with open(elf_path, 'rb') as f: for symbol in thing: if symbol.section != 'r' or ( not all_rodata and not symbol.IsStringLiteral()): continue f.seek(symbol.address + adjust) data = f.read(symbol.size_without_padding) # As of Oct 2017, there are ~90 symbols name .L.str(.##). These appear # in the linker map file explicitly, and there doesn't seem to be a # pattern as to which variables lose their kConstant name (the more # common case), or which string literals don't get moved to # ** merge strings (less common). if symbol.IsStringLiteral() or ( all_rodata and data and data[-1] == '\0'): ret.append((symbol, data)) return ret