def testEncodeDictOfLists_Join_Empty(self): test_dict1 = {} test_dict2 = {} expected = {} encoded1 = concurrent.EncodeDictOfLists(test_dict1) encoded2 = concurrent.EncodeDictOfLists(test_dict2) encoded = concurrent.JoinEncodedDictOfLists([encoded1, encoded2]) decoded = concurrent.DecodeDictOfLists(encoded) self.assertEquals(expected, decoded)
def testEncodeDictOfLists_JoinMultiple(self): test_dict1 = {'key1': ['a']} test_dict2 = {'key2': ['b']} expected = {'key1': ['a'], 'key2': ['b']} encoded1 = concurrent.EncodeDictOfLists(test_dict1) encoded2 = concurrent.EncodeDictOfLists({}) encoded3 = concurrent.EncodeDictOfLists(test_dict2) encoded = concurrent.JoinEncodedDictOfLists([encoded1, encoded2, encoded3]) decoded = concurrent.DecodeDictOfLists(encoded) self.assertEquals(expected, decoded)
def RunNmOnIntermediates(target, tool_prefix, output_directory): """Returns encoded_symbol_names_by_path, encoded_string_addresses_by_path. Args: target: Either a single path to a .a (as a string), or a list of .o paths. """ is_archive = isinstance(target, basestring) args = [path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only'] if is_archive: args.append(target) else: args.extend(target) proc = subprocess.Popen(args, cwd=output_directory, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # llvm-nm can print 'no symbols' to stderr. Capture and count the number of # lines, to be returned to the caller. stdout, stderr = proc.communicate() assert proc.returncode == 0 num_no_symbols = len(stderr.splitlines()) lines = stdout.splitlines() # Empty .a file has no output. if not lines: return concurrent.EMPTY_ENCODED_DICT, concurrent.EMPTY_ENCODED_DICT is_multi_file = not lines[0] lines = iter(lines) if is_multi_file: next(lines) path = next(lines)[:-1] # Path ends with a colon. else: assert not is_archive path = target[0] symbol_names_by_path = {} string_addresses_by_path = {} while path: if is_archive: # E.g. foo/bar.a(baz.o) path = '%s(%s)' % (target, path) mangled_symbol_names, string_addresses = _ParseOneObjectFileNmOutput( lines) symbol_names_by_path[path] = mangled_symbol_names if string_addresses: string_addresses_by_path[path] = string_addresses path = next(lines, ':')[:-1] # The multiprocess API uses pickle, which is ridiculously slow. More than 2x # faster to use join & split. # TODO(agrieve): We could use path indices as keys rather than paths to cut # down on marshalling overhead. return (concurrent.EncodeDictOfLists(symbol_names_by_path), concurrent.EncodeDictOfLists(string_addresses_by_path), num_no_symbols)
def _RunNmOnIntermediates(target, tool_prefix, output_directory): """Returns encoded_symbol_names_by_path, encoded_string_addresses_by_path. Args: target: Either a single path to a .a (as a string), or a list of .o paths. """ is_archive = isinstance(target, basestring) args = [ path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only', '--demangle' ] if is_archive: args.append(target) else: args.extend(target) output = subprocess.check_output(args, cwd=output_directory) lines = output.splitlines() # Empty .a file has no output. if not lines: return concurrent.EMPTY_ENCODED_DICT, concurrent.EMPTY_ENCODED_DICT is_multi_file = not lines[0] lines = iter(lines) if is_multi_file: next(lines) path = next(lines)[:-1] # Path ends with a colon. else: assert not is_archive path = target[0] string_addresses_by_path = {} symbol_names_by_path = {} while path: if is_archive: # E.g. foo/bar.a(baz.o) path = '%s(%s)' % (target, path) string_addresses, symbol_names = _ParseOneObjectFileNmOutput(lines) symbol_names_by_path[path] = symbol_names if string_addresses: string_addresses_by_path[path] = string_addresses path = next(lines, ':')[:-1] # The multiprocess API uses pickle, which is ridiculously slow. More than 2x # faster to use join & split. # TODO(agrieve): We could use path indices as keys rather than paths to cut # down on marshalling overhead. return (concurrent.EncodeDictOfLists(symbol_names_by_path), concurrent.EncodeDictOfLists(string_addresses_by_path))
def _HandleMessage(self, message): if message[0] == _MSG_ANALYZE_PATHS: assert self._allow_analyze_paths, ( 'Cannot call AnalyzePaths() after AnalyzeStringLiterals()s.') # Invert '\x01'.join(paths), favoring paths = [] over paths = [''] since # the latter is less likely to happen. paths = message[1].split('\x01') if message[1] else [] self._job_queue.put( lambda: self._worker_analyzer.AnalyzePaths(paths)) elif message[0] == _MSG_SORT_PATHS: assert self._allow_analyze_paths, ( 'Cannot call SortPaths() after AnalyzeStringLiterals()s.') self._job_queue.put(self._worker_analyzer.SortPaths) elif message[0] == _MSG_ANALYZE_STRINGS: self._WaitForAnalyzePathJobs() elf_path, string_positions = message[1:] self._job_queue.put( lambda: self._worker_analyzer.AnalyzeStringLiterals( elf_path, string_positions)) elif message[0] == _MSG_GET_SYMBOL_NAMES: self._WaitForAnalyzePathJobs() self._pipe.send(None) paths_by_name = self._worker_analyzer.GetSymbolNames() self._pipe.send(concurrent.EncodeDictOfLists(paths_by_name)) elif message[0] == _MSG_GET_STRINGS: self._job_queue.join() # Send a None packet so that other side can measure IPC transfer time. self._pipe.send(None) self._pipe.send(self._worker_analyzer.GetEncodedStringPositions())
def _SubMain(log_level, tool_prefix, output_directory): logging.basicConfig( level=int(log_level), format='nm: %(levelname).1s %(relativeCreated)6d %(message)s') bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix, output_directory) while True: payload_len = int(sys.stdin.read(8) or '0', 16) if not payload_len: logging.debug('nm bulk subprocess received eof.') break paths = sys.stdin.read(payload_len).split('\x01') bulk_analyzer.AnalyzePaths(paths) bulk_analyzer.Close() paths_by_name = bulk_analyzer.Get() encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name) try: sys.stdout.write('%08x' % len(encoded_keys)) sys.stdout.write(encoded_keys) sys.stdout.write(encoded_values) except IOError, e: # Parent process exited. if e.errno == errno.EPIPE: sys.exit(1)
def ResolveStringPiecesIndirect(encoded_string_addresses_by_path, string_data, tool_prefix, output_directory): string_addresses_by_path = concurrent.DecodeDictOfLists( encoded_string_addresses_by_path) # Assign |target| as archive path, or a list of object paths. any_path = next(string_addresses_by_path.iterkeys()) target = _ExtractArchivePath(any_path) if not target: target = string_addresses_by_path.keys() # Run readelf to find location of .rodata within the .o files. section_positions_by_path = _LookupStringSectionPositions( target, tool_prefix, output_directory) # Load the .rodata sections (from object files) as strings. string_sections_by_path = _ReadStringSections(target, output_directory, section_positions_by_path) def GeneratePathAndValues(): for path, object_addresses in string_addresses_by_path.iteritems(): for value in _IterStringLiterals( path, object_addresses, string_sections_by_path.get(path)): yield path, value ret = _AnnotateStringData(string_data, GeneratePathAndValues()) return [concurrent.EncodeDictOfLists(x) for x in ret]
def ResolveStringPieces(encoded_string_addresses_by_path, string_data, tool_prefix, output_directory): string_addresses_by_path = concurrent.DecodeDictOfLists( encoded_string_addresses_by_path) # Assign |target| as archive path, or a list of object paths. any_path = next(string_addresses_by_path.iterkeys()) target = _ExtractArchivePath(any_path) if not target: target = string_addresses_by_path.keys() # Run readelf to find location of .rodata within the .o files. section_positions_by_path = _LookupStringSectionPositions( target, tool_prefix, output_directory) # Load the .rodata sections (from object files) as strings. string_sections_by_path = _ReadStringSections( target, output_directory, section_positions_by_path) # list of elf_positions_by_path. ret = [collections.defaultdict(list) for _ in string_data] # Brute-force search of strings within ** merge strings sections. # This is by far the slowest part of AnalyzeStringLiterals(). # TODO(agrieve): Pre-process string_data into a dict of literal->address (at # least for ascii strings). for path, object_addresses in string_addresses_by_path.iteritems(): for value in _IterStringLiterals( path, object_addresses, string_sections_by_path.get(path)): first_match = -1 first_match_dict = None for target_dict, data in itertools.izip(ret, string_data): # Set offset so that it will be 0 when len(value) is added to it below. offset = -len(value) while True: offset = data.find(value, offset + len(value)) if offset == -1: break # Preferring exact matches (those following \0) over substring matches # significantly increases accuracy (although shows that linker isn't # being optimal). if offset == 0 or data[offset - 1] == '\0': break if first_match == -1: first_match = offset first_match_dict = target_dict if offset != -1: break if offset == -1: # Exact match not found, so take suffix match if it exists. offset = first_match target_dict = first_match_dict # Missing strings happen when optimization make them unused. if offset != -1: # Encode tuple as a string for easier mashalling. target_dict[path].append( str(offset) + ':' + str(len(value))) return [concurrent.EncodeDictOfLists(x) for x in ret]
def ResolveStringPieces(encoded_strings_by_path, string_data): # ast.literal_eval() undoes repr() applied to strings. strings_by_path = concurrent.DecodeDictOfLists( encoded_strings_by_path, value_transform=ast.literal_eval) def GeneratePathAndValues(): for path, strings in strings_by_path.iteritems(): for value in strings: yield path, value ret = _AnnotateStringData(string_data, GeneratePathAndValues()) return [concurrent.EncodeDictOfLists(x) for x in ret]
def RunBcAnalyzerOnIntermediates(target, tool_prefix, output_directory): """Calls bcanalyzer and returns encoded map from path to strings. Args: target: A list of BC file paths. """ assert isinstance(target, list) runner = _BcAnalyzerRunner(tool_prefix, output_directory) strings_by_path = {} for t in target: strings_by_path[t] = [ s for _, s in _ParseBcAnalyzer(runner.RunOnFile(t)) ] # Escape strings by repr() so there will be no special characters to interfere # concurrent.EncodeDictOfLists() and decoding. return concurrent.EncodeDictOfLists(strings_by_path, value_transform=repr)
def Run(self): try: self._worker_thread.start() while True: message = self._pipe.recv() if message[0] == _MSG_ANALYZE_PATHS: assert self._allow_analyze_paths, ( 'Cannot call AnalyzePaths() after AnalyzeStringLiterals()s.' ) paths = message[1].split('\x01') self._job_queue.put( lambda: self._worker_analyzer.AnalyzePaths(paths)) elif message[0] == _MSG_SORT_PATHS: assert self._allow_analyze_paths, ( 'Cannot call SortPaths() after AnalyzeStringLiterals()s.' ) self._job_queue.put(self._worker_analyzer.SortPaths) elif message[0] == _MSG_ANALYZE_STRINGS: self._WaitForAnalyzePathJobs() elf_path, string_positions = message[1:] self._job_queue.put( lambda: self._worker_analyzer.AnalyzeStringLiterals( elf_path, string_positions)) elif message[0] == _MSG_GET_SYMBOL_NAMES: self._WaitForAnalyzePathJobs() self._pipe.send(None) paths_by_name = self._worker_analyzer.GetSymbolNames() self._pipe.send( concurrent.EncodeDictOfLists(paths_by_name)) elif message[0] == _MSG_GET_STRINGS: self._job_queue.join() # Send a None packet so that other side can measure IPC transfer time. self._pipe.send(None) self._pipe.send( self._worker_analyzer.GetEncodedStringPositions()) except EOFError: pass except EnvironmentError, e: # Parent process exited so don't log. if e.errno in (errno.EPIPE, errno.ECONNRESET): sys.exit(1)
def _BatchCollectNames(target, tool_prefix, output_directory): is_archive = isinstance(target, basestring) # Ensure tool_prefix is absolute so that CWD does not affect it if os.path.sep in tool_prefix: # Use abspath() on the dirname to avoid it stripping a trailing /. dirname = os.path.dirname(tool_prefix) tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):] args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle'] if is_archive: args.append(target) else: args.extend(target) output = subprocess.check_output(args, cwd=output_directory) lines = output.splitlines() if not lines: return '', '' is_multi_file = not lines[0] lines = iter(lines) if is_multi_file: next(lines) path = next(lines)[:-1] # Path ends with a colon. else: assert not is_archive path = target[0] ret = {} while True: if is_archive: # E.g. foo/bar.a(baz.o) path = '%s(%s)' % (target, path) # The multiprocess API uses pickle, which is ridiculously slow. More than 2x # faster to use join & split. ret[path] = _ParseOneObjectFileOutput(lines) path = next(lines, ':')[:-1] if not path: return concurrent.EncodeDictOfLists(ret)
def testEncodeDictOfLists_EmptyValue(self): test_dict = {'foo': []} encoded = concurrent.EncodeDictOfLists(test_dict) decoded = concurrent.DecodeDictOfLists(encoded) self.assertEquals(test_dict, decoded)
def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix): result = CollectAliasesByAddress(elf_path, tool_prefix) return concurrent.EncodeDictOfLists(result, key_transform=str)
def testEncodeDictOfLists_Join_Singl(self): test_dict1 = {'key1': ['a']} encoded1 = concurrent.EncodeDictOfLists(test_dict1) encoded = concurrent.JoinEncodedDictOfLists([encoded1]) decoded = concurrent.DecodeDictOfLists(encoded) self.assertEquals(test_dict1, decoded)
def testEncodeDictOfLists_AllStrings(self): test_dict = {'foo': ['a', 'b', 'c'], 'foo2': ['a', 'b']} encoded = concurrent.EncodeDictOfLists(test_dict) decoded = concurrent.DecodeDictOfLists(encoded) self.assertEquals(test_dict, decoded)
def testEncodeDictOfLists_ValueTransform(self): test_dict = {'a': ['0', '1', '2'], 'b': ['3', '4']} expected = {'a': [0, 1, 2], 'b': [3, 4]} encoded = concurrent.EncodeDictOfLists(test_dict) decoded = concurrent.DecodeDictOfLists(encoded, value_transform=int) self.assertEquals(expected, decoded)
def testEncodeDictOfLists_KeyTransform(self): test_dict = {0: ['a', 'b', 'c'], 9: ['a', 'b']} encoded = concurrent.EncodeDictOfLists(test_dict, key_transform=str) decoded = concurrent.DecodeDictOfLists(encoded, key_transform=int) self.assertEquals(test_dict, decoded)