예제 #1
0
 def testEncodeDictOfLists_Join_Empty(self):
     test_dict1 = {}
     test_dict2 = {}
     expected = {}
     encoded1 = concurrent.EncodeDictOfLists(test_dict1)
     encoded2 = concurrent.EncodeDictOfLists(test_dict2)
     encoded = concurrent.JoinEncodedDictOfLists([encoded1, encoded2])
     decoded = concurrent.DecodeDictOfLists(encoded)
     self.assertEquals(expected, decoded)
예제 #2
0
 def testEncodeDictOfLists_JoinMultiple(self):
   test_dict1 = {'key1': ['a']}
   test_dict2 = {'key2': ['b']}
   expected = {'key1': ['a'], 'key2': ['b']}
   encoded1 = concurrent.EncodeDictOfLists(test_dict1)
   encoded2 = concurrent.EncodeDictOfLists({})
   encoded3 = concurrent.EncodeDictOfLists(test_dict2)
   encoded = concurrent.JoinEncodedDictOfLists([encoded1, encoded2, encoded3])
   decoded = concurrent.DecodeDictOfLists(encoded)
   self.assertEquals(expected, decoded)
예제 #3
0
def RunNmOnIntermediates(target, tool_prefix, output_directory):
    """Returns encoded_symbol_names_by_path, encoded_string_addresses_by_path.

  Args:
    target: Either a single path to a .a (as a string), or a list of .o paths.
  """
    is_archive = isinstance(target, basestring)
    args = [path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only']
    if is_archive:
        args.append(target)
    else:
        args.extend(target)
    proc = subprocess.Popen(args,
                            cwd=output_directory,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    # llvm-nm can print 'no symbols' to stderr. Capture and count the number of
    # lines, to be returned to the caller.
    stdout, stderr = proc.communicate()
    assert proc.returncode == 0
    num_no_symbols = len(stderr.splitlines())
    lines = stdout.splitlines()
    # Empty .a file has no output.
    if not lines:
        return concurrent.EMPTY_ENCODED_DICT, concurrent.EMPTY_ENCODED_DICT
    is_multi_file = not lines[0]
    lines = iter(lines)
    if is_multi_file:
        next(lines)
        path = next(lines)[:-1]  # Path ends with a colon.
    else:
        assert not is_archive
        path = target[0]

    symbol_names_by_path = {}
    string_addresses_by_path = {}
    while path:
        if is_archive:
            # E.g. foo/bar.a(baz.o)
            path = '%s(%s)' % (target, path)

        mangled_symbol_names, string_addresses = _ParseOneObjectFileNmOutput(
            lines)
        symbol_names_by_path[path] = mangled_symbol_names
        if string_addresses:
            string_addresses_by_path[path] = string_addresses
        path = next(lines, ':')[:-1]

    # The multiprocess API uses pickle, which is ridiculously slow. More than 2x
    # faster to use join & split.
    # TODO(agrieve): We could use path indices as keys rather than paths to cut
    #     down on marshalling overhead.
    return (concurrent.EncodeDictOfLists(symbol_names_by_path),
            concurrent.EncodeDictOfLists(string_addresses_by_path),
            num_no_symbols)
예제 #4
0
def _RunNmOnIntermediates(target, tool_prefix, output_directory):
    """Returns encoded_symbol_names_by_path, encoded_string_addresses_by_path.

  Args:
    target: Either a single path to a .a (as a string), or a list of .o paths.
  """
    is_archive = isinstance(target, basestring)
    args = [
        path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only',
        '--demangle'
    ]
    if is_archive:
        args.append(target)
    else:
        args.extend(target)
    output = subprocess.check_output(args, cwd=output_directory)
    lines = output.splitlines()
    # Empty .a file has no output.
    if not lines:
        return concurrent.EMPTY_ENCODED_DICT, concurrent.EMPTY_ENCODED_DICT
    is_multi_file = not lines[0]
    lines = iter(lines)
    if is_multi_file:
        next(lines)
        path = next(lines)[:-1]  # Path ends with a colon.
    else:
        assert not is_archive
        path = target[0]

    string_addresses_by_path = {}
    symbol_names_by_path = {}
    while path:
        if is_archive:
            # E.g. foo/bar.a(baz.o)
            path = '%s(%s)' % (target, path)

        string_addresses, symbol_names = _ParseOneObjectFileNmOutput(lines)
        symbol_names_by_path[path] = symbol_names
        if string_addresses:
            string_addresses_by_path[path] = string_addresses
        path = next(lines, ':')[:-1]

    # The multiprocess API uses pickle, which is ridiculously slow. More than 2x
    # faster to use join & split.
    # TODO(agrieve): We could use path indices as keys rather than paths to cut
    #     down on marshalling overhead.
    return (concurrent.EncodeDictOfLists(symbol_names_by_path),
            concurrent.EncodeDictOfLists(string_addresses_by_path))
예제 #5
0
 def _HandleMessage(self, message):
     if message[0] == _MSG_ANALYZE_PATHS:
         assert self._allow_analyze_paths, (
             'Cannot call AnalyzePaths() after AnalyzeStringLiterals()s.')
         # Invert '\x01'.join(paths), favoring paths = [] over paths = [''] since
         # the latter is less likely to happen.
         paths = message[1].split('\x01') if message[1] else []
         self._job_queue.put(
             lambda: self._worker_analyzer.AnalyzePaths(paths))
     elif message[0] == _MSG_SORT_PATHS:
         assert self._allow_analyze_paths, (
             'Cannot call SortPaths() after AnalyzeStringLiterals()s.')
         self._job_queue.put(self._worker_analyzer.SortPaths)
     elif message[0] == _MSG_ANALYZE_STRINGS:
         self._WaitForAnalyzePathJobs()
         elf_path, string_positions = message[1:]
         self._job_queue.put(
             lambda: self._worker_analyzer.AnalyzeStringLiterals(
                 elf_path, string_positions))
     elif message[0] == _MSG_GET_SYMBOL_NAMES:
         self._WaitForAnalyzePathJobs()
         self._pipe.send(None)
         paths_by_name = self._worker_analyzer.GetSymbolNames()
         self._pipe.send(concurrent.EncodeDictOfLists(paths_by_name))
     elif message[0] == _MSG_GET_STRINGS:
         self._job_queue.join()
         # Send a None packet so that other side can measure IPC transfer time.
         self._pipe.send(None)
         self._pipe.send(self._worker_analyzer.GetEncodedStringPositions())
예제 #6
0
def _SubMain(log_level, tool_prefix, output_directory):
    logging.basicConfig(
        level=int(log_level),
        format='nm: %(levelname).1s %(relativeCreated)6d %(message)s')
    bulk_analyzer = _BulkObjectFileAnalyzerWorker(tool_prefix,
                                                  output_directory)
    while True:
        payload_len = int(sys.stdin.read(8) or '0', 16)
        if not payload_len:
            logging.debug('nm bulk subprocess received eof.')
            break
        paths = sys.stdin.read(payload_len).split('\x01')
        bulk_analyzer.AnalyzePaths(paths)

    bulk_analyzer.Close()
    paths_by_name = bulk_analyzer.Get()
    encoded_keys, encoded_values = concurrent.EncodeDictOfLists(paths_by_name)
    try:
        sys.stdout.write('%08x' % len(encoded_keys))
        sys.stdout.write(encoded_keys)
        sys.stdout.write(encoded_values)
    except IOError, e:
        # Parent process exited.
        if e.errno == errno.EPIPE:
            sys.exit(1)
예제 #7
0
def ResolveStringPiecesIndirect(encoded_string_addresses_by_path, string_data,
                                tool_prefix, output_directory):
    string_addresses_by_path = concurrent.DecodeDictOfLists(
        encoded_string_addresses_by_path)
    # Assign |target| as archive path, or a list of object paths.
    any_path = next(string_addresses_by_path.iterkeys())
    target = _ExtractArchivePath(any_path)
    if not target:
        target = string_addresses_by_path.keys()

    # Run readelf to find location of .rodata within the .o files.
    section_positions_by_path = _LookupStringSectionPositions(
        target, tool_prefix, output_directory)
    # Load the .rodata sections (from object files) as strings.
    string_sections_by_path = _ReadStringSections(target, output_directory,
                                                  section_positions_by_path)

    def GeneratePathAndValues():
        for path, object_addresses in string_addresses_by_path.iteritems():
            for value in _IterStringLiterals(
                    path, object_addresses, string_sections_by_path.get(path)):
                yield path, value

    ret = _AnnotateStringData(string_data, GeneratePathAndValues())
    return [concurrent.EncodeDictOfLists(x) for x in ret]
예제 #8
0
def ResolveStringPieces(encoded_string_addresses_by_path, string_data,
                        tool_prefix, output_directory):
  string_addresses_by_path = concurrent.DecodeDictOfLists(
      encoded_string_addresses_by_path)
  # Assign |target| as archive path, or a list of object paths.
  any_path = next(string_addresses_by_path.iterkeys())
  target = _ExtractArchivePath(any_path)
  if not target:
    target = string_addresses_by_path.keys()

  # Run readelf to find location of .rodata within the .o files.
  section_positions_by_path = _LookupStringSectionPositions(
      target, tool_prefix, output_directory)
  # Load the .rodata sections (from object files) as strings.
  string_sections_by_path = _ReadStringSections(
      target, output_directory, section_positions_by_path)

  # list of elf_positions_by_path.
  ret = [collections.defaultdict(list) for _ in string_data]
  # Brute-force search of strings within ** merge strings sections.
  # This is by far the slowest part of AnalyzeStringLiterals().
  # TODO(agrieve): Pre-process string_data into a dict of literal->address (at
  #     least for ascii strings).
  for path, object_addresses in string_addresses_by_path.iteritems():
    for value in _IterStringLiterals(
        path, object_addresses, string_sections_by_path.get(path)):
      first_match = -1
      first_match_dict = None
      for target_dict, data in itertools.izip(ret, string_data):
        # Set offset so that it will be 0 when len(value) is added to it below.
        offset = -len(value)
        while True:
          offset = data.find(value, offset + len(value))
          if offset == -1:
            break
          # Preferring exact matches (those following \0) over substring matches
          # significantly increases accuracy (although shows that linker isn't
          # being optimal).
          if offset == 0 or data[offset - 1] == '\0':
            break
          if first_match == -1:
            first_match = offset
            first_match_dict = target_dict
        if offset != -1:
          break
      if offset == -1:
        # Exact match not found, so take suffix match if it exists.
        offset = first_match
        target_dict = first_match_dict
      # Missing strings happen when optimization make them unused.
      if offset != -1:
        # Encode tuple as a string for easier mashalling.
        target_dict[path].append(
            str(offset) + ':' + str(len(value)))

  return [concurrent.EncodeDictOfLists(x) for x in ret]
예제 #9
0
def ResolveStringPieces(encoded_strings_by_path, string_data):
    # ast.literal_eval() undoes repr() applied to strings.
    strings_by_path = concurrent.DecodeDictOfLists(
        encoded_strings_by_path, value_transform=ast.literal_eval)

    def GeneratePathAndValues():
        for path, strings in strings_by_path.iteritems():
            for value in strings:
                yield path, value

    ret = _AnnotateStringData(string_data, GeneratePathAndValues())
    return [concurrent.EncodeDictOfLists(x) for x in ret]
예제 #10
0
def RunBcAnalyzerOnIntermediates(target, tool_prefix, output_directory):
    """Calls bcanalyzer and returns encoded map from path to strings.

  Args:
    target: A list of BC file paths.
  """
    assert isinstance(target, list)
    runner = _BcAnalyzerRunner(tool_prefix, output_directory)
    strings_by_path = {}
    for t in target:
        strings_by_path[t] = [
            s for _, s in _ParseBcAnalyzer(runner.RunOnFile(t))
        ]
    # Escape strings by repr() so there will be no special characters to interfere
    # concurrent.EncodeDictOfLists() and decoding.
    return concurrent.EncodeDictOfLists(strings_by_path, value_transform=repr)
예제 #11
0
 def Run(self):
     try:
         self._worker_thread.start()
         while True:
             message = self._pipe.recv()
             if message[0] == _MSG_ANALYZE_PATHS:
                 assert self._allow_analyze_paths, (
                     'Cannot call AnalyzePaths() after AnalyzeStringLiterals()s.'
                 )
                 paths = message[1].split('\x01')
                 self._job_queue.put(
                     lambda: self._worker_analyzer.AnalyzePaths(paths))
             elif message[0] == _MSG_SORT_PATHS:
                 assert self._allow_analyze_paths, (
                     'Cannot call SortPaths() after AnalyzeStringLiterals()s.'
                 )
                 self._job_queue.put(self._worker_analyzer.SortPaths)
             elif message[0] == _MSG_ANALYZE_STRINGS:
                 self._WaitForAnalyzePathJobs()
                 elf_path, string_positions = message[1:]
                 self._job_queue.put(
                     lambda: self._worker_analyzer.AnalyzeStringLiterals(
                         elf_path, string_positions))
             elif message[0] == _MSG_GET_SYMBOL_NAMES:
                 self._WaitForAnalyzePathJobs()
                 self._pipe.send(None)
                 paths_by_name = self._worker_analyzer.GetSymbolNames()
                 self._pipe.send(
                     concurrent.EncodeDictOfLists(paths_by_name))
             elif message[0] == _MSG_GET_STRINGS:
                 self._job_queue.join()
                 # Send a None packet so that other side can measure IPC transfer time.
                 self._pipe.send(None)
                 self._pipe.send(
                     self._worker_analyzer.GetEncodedStringPositions())
     except EOFError:
         pass
     except EnvironmentError, e:
         # Parent process exited so don't log.
         if e.errno in (errno.EPIPE, errno.ECONNRESET):
             sys.exit(1)
예제 #12
0
def _BatchCollectNames(target, tool_prefix, output_directory):
    is_archive = isinstance(target, basestring)
    # Ensure tool_prefix is absolute so that CWD does not affect it
    if os.path.sep in tool_prefix:
        # Use abspath() on the dirname to avoid it stripping a trailing /.
        dirname = os.path.dirname(tool_prefix)
        tool_prefix = os.path.abspath(dirname) + tool_prefix[len(dirname):]

    args = [tool_prefix + 'nm', '--no-sort', '--defined-only', '--demangle']
    if is_archive:
        args.append(target)
    else:
        args.extend(target)
    output = subprocess.check_output(args, cwd=output_directory)
    lines = output.splitlines()
    if not lines:
        return '', ''
    is_multi_file = not lines[0]
    lines = iter(lines)
    if is_multi_file:
        next(lines)
        path = next(lines)[:-1]  # Path ends with a colon.
    else:
        assert not is_archive
        path = target[0]

    ret = {}
    while True:
        if is_archive:
            # E.g. foo/bar.a(baz.o)
            path = '%s(%s)' % (target, path)
        # The multiprocess API uses pickle, which is ridiculously slow. More than 2x
        # faster to use join & split.
        ret[path] = _ParseOneObjectFileOutput(lines)
        path = next(lines, ':')[:-1]
        if not path:
            return concurrent.EncodeDictOfLists(ret)
예제 #13
0
 def testEncodeDictOfLists_EmptyValue(self):
     test_dict = {'foo': []}
     encoded = concurrent.EncodeDictOfLists(test_dict)
     decoded = concurrent.DecodeDictOfLists(encoded)
     self.assertEquals(test_dict, decoded)
예제 #14
0
def _CollectAliasesByAddressAsyncHelper(elf_path, tool_prefix):
    result = CollectAliasesByAddress(elf_path, tool_prefix)
    return concurrent.EncodeDictOfLists(result, key_transform=str)
예제 #15
0
 def testEncodeDictOfLists_Join_Singl(self):
     test_dict1 = {'key1': ['a']}
     encoded1 = concurrent.EncodeDictOfLists(test_dict1)
     encoded = concurrent.JoinEncodedDictOfLists([encoded1])
     decoded = concurrent.DecodeDictOfLists(encoded)
     self.assertEquals(test_dict1, decoded)
예제 #16
0
 def testEncodeDictOfLists_AllStrings(self):
     test_dict = {'foo': ['a', 'b', 'c'], 'foo2': ['a', 'b']}
     encoded = concurrent.EncodeDictOfLists(test_dict)
     decoded = concurrent.DecodeDictOfLists(encoded)
     self.assertEquals(test_dict, decoded)
예제 #17
0
 def testEncodeDictOfLists_ValueTransform(self):
     test_dict = {'a': ['0', '1', '2'], 'b': ['3', '4']}
     expected = {'a': [0, 1, 2], 'b': [3, 4]}
     encoded = concurrent.EncodeDictOfLists(test_dict)
     decoded = concurrent.DecodeDictOfLists(encoded, value_transform=int)
     self.assertEquals(expected, decoded)
예제 #18
0
 def testEncodeDictOfLists_KeyTransform(self):
     test_dict = {0: ['a', 'b', 'c'], 9: ['a', 'b']}
     encoded = concurrent.EncodeDictOfLists(test_dict, key_transform=str)
     decoded = concurrent.DecodeDictOfLists(encoded, key_transform=int)
     self.assertEquals(test_dict, decoded)