Beispiel #1
0
def main():
    parser = optparse.OptionParser(
        usage='usage: %prog [options] <binary> <orderfile>')
    parser.add_option('--target-arch',
                      action='store',
                      dest='arch',
                      choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
                      help='The target architecture for the binary.')
    parser.add_option(
        '--threshold',
        action='store',
        dest='threshold',
        default=20,
        type=int,
        help='The maximum allowed number of out-of-order symbols.')
    options, argv = parser.parse_args(sys.argv)
    if not options.arch:
        options.arch = cygprofile_utils.DetectArchitecture()
    if len(argv) != 3:
        parser.print_help()
        return 1
    (binary_filename, orderfile_filename) = argv[1:]

    symbol_extractor.SetArchitecture(options.arch)
    symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)

    if not _VerifySymbolOrder(
        [sym.strip() for sym in file(orderfile_filename)], symbol_infos,
            options.threshold):
        return 1
def main():
  parser = optparse.OptionParser(usage=
      'usage: %prog [options] <binary> <orderfile>')
  parser.add_option('--target-arch', action='store', dest='arch',
                    choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
                    help='The target architecture for the binary.')
  parser.add_option('--threshold', action='store', dest='threshold', default=1,
                    help='The maximum allowed number of out-of-order symbols.')
  options, argv = parser.parse_args(sys.argv)
  if not options.arch:
    options.arch = cygprofile_utils.DetectArchitecture()
  if len(argv) != 3:
    parser.print_help()
    return 1
  (binary_filename, orderfile_filename) = argv[1:]

  symbol_extractor.SetArchitecture(options.arch)
  obj_dir = cygprofile_utils.GetObjDir(binary_filename)
  symbol_to_sections_map = \
      cyglog_to_orderfile.GetSymbolToSectionsMapFromObjectFiles(obj_dir)
  section_to_symbols_map = cygprofile_utils.InvertMapping(
      symbol_to_sections_map)
  symbols = patch_orderfile.GetSymbolsFromOrderfile(orderfile_filename,
                                                    section_to_symbols_map)
  symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)
  # Missing symbols is not an error since some of them can be eliminated through
  # inlining.
  (misordered_pairs_count, matched_symbols, _) = _CountMisorderedSymbols(
      symbols, symbol_infos)
  return (misordered_pairs_count > options.threshold) or (matched_symbols == 0)
def MatchSymbolsInRegularBuild(reached_symbol_infos,
                               regular_native_lib_filename):
    """Match a list of symbols to canonical ones on the regular build.

  Args:
    reached_symbol_infos: ([symbol_extractor.SymbolInfo]) Reached symbol
      in the instrumented build.
    regular_native_lib_filename: (str) regular build filename.

  Returns:
    [symbol_extractor.SymbolInfo] list of matched canonical symbols.
  """
    regular_build_symbol_infos = symbol_extractor.SymbolInfosFromBinary(
        regular_native_lib_filename)
    regular_build_symbol_names = set(s.name
                                     for s in regular_build_symbol_infos)
    reached_symbol_names = set(s.name for s in reached_symbol_infos)
    logging.info('Reached symbols = %d', len(reached_symbol_names))
    matched_names = reached_symbol_names.intersection(
        regular_build_symbol_names)
    logging.info('Matched symbols = %d', len(matched_names))

    symbol_name_to_primary = SymbolNameToPrimary(regular_build_symbol_infos)
    matched_primary_symbols = []
    for symbol in reached_symbol_names:
        if symbol in matched_names:
            matched_primary_symbols.append(symbol_name_to_primary[symbol])
    return matched_primary_symbols
Beispiel #4
0
def main():
    parser = CreateArgumentParser()
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    symbol_extractor.SetArchitecture(args.arch)
    logging.info('Parsing object files in %s', args.build_directory)
    object_files_symbols = _GetSymbolNameToFilename(args.build_directory)
    native_lib_filename = os.path.join(args.build_directory, 'lib.unstripped',
                                       args.native_library)
    if not os.path.exists(native_lib_filename):
        logging.error('Native library not found. Did you build the APK?')
        return 1

    offset = 0
    if args.residency:
        with open(args.residency) as f:
            residency = json.load(f)
            offset = residency['offset']

    logging.info('Extracting symbols from %s', native_lib_filename)
    native_lib_symbols = symbol_extractor.SymbolInfosFromBinary(
        native_lib_filename)
    logging.info('%d Symbols found', len(native_lib_symbols))
    logging.info('Mapping symbols and object files to code pages')
    page_to_symbols = CodePagesToMangledSymbols(native_lib_symbols, offset)
    page_to_object_files = CodePagesToObjectFiles(object_files_symbols,
                                                  page_to_symbols)

    if args.reached_symbols_file:
        logging.info('Mapping reached symbols to code pages')
        reached_symbol_names = ReadReachedSymbols(args.reached_symbols_file)
        reached_data = CodePagesToReachedSize(reached_symbol_names,
                                              page_to_symbols)
        WriteReachedData(os.path.join(args.output_directory, 'reached.json'),
                         reached_data)

    if not os.path.exists(args.output_directory):
        os.makedirs(args.output_directory)
    text_output_filename = os.path.join(args.output_directory, 'map.txt')
    json_output_filename = os.path.join(args.output_directory, 'map.json')
    WriteCodePageAttribution(page_to_object_files, text_output_filename,
                             json_output_filename)
    directory = os.path.dirname(__file__)

    for filename in ['visualize.html', 'visualize.js', 'visualize.css']:
        if args.residency:
            shutil.copy(args.residency,
                        os.path.join(args.output_directory, 'residency.json'))
        shutil.copy(os.path.join(directory, filename),
                    os.path.join(args.output_directory, filename))

    if args.start_server:
        os.chdir(args.output_directory)
        httpd = SocketServer.TCPServer(
            ('', args.port), SimpleHTTPServer.SimpleHTTPRequestHandler)
        logging.warning('Serving on port %d', args.port)
        httpd.serve_forever()

    return 0
def _GroupSymbolInfosFromBinary(binary_filename):
    """Group all the symbols from a binary by name and offset.

  Args:
    binary_filename: path to the binary.

  Returns:
    A tuple of dict:
    (offset_to_symbol_infos, name_to_symbol_infos):
    - offset_to_symbol_infos: {offset: [symbol_info1, ...]}
    - name_to_symbol_infos: {name: [symbol_info1, ...]}
  """
    symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)
    return _GroupSymbolInfos(symbol_infos)
Beispiel #6
0
    def SymbolInfos(self):
        """The symbols associated with this processor's binary.

    The symbols are ordered by offset.

    Returns:
      [symbol_extractor.SymbolInfo]
    """
        if self._symbol_infos is None:
            self._symbol_infos = symbol_extractor.SymbolInfosFromBinary(
                self._binary_filename)
            self._symbol_infos.sort(key=lambda s: s.offset)
            logging.info('%d symbols from %s', len(self._symbol_infos),
                         self._binary_filename)
        return self._symbol_infos
def GetOffsetToSymbolArray(instrumented_native_lib_filename):
    """From the native library, maps .text offsets to symbols.

  Args:
    instrumented_native_lib_filename: (str) Native library filename.
                                      Has to be the instrumented version.

  Returns:
    [symbol_extractor.SymbolInfo or None] For every 4 bytes of the .text
    section, maps it to a symbol, or None.
  """
    symbol_infos = symbol_extractor.SymbolInfosFromBinary(
        instrumented_native_lib_filename)
    logging.info('%d Symbols', len(symbol_infos))
    return GetOffsetToSymbolInfo(symbol_infos)
Beispiel #8
0
def _GroupSymbolInfosFromBinary(binary_filename):
  """Group all the symbols from a binary by name and offset.

  Args:
    binary_filename: path to the binary.

  Returns:
    A tuple of dict:
    (offset_to_symbol_infos, name_to_symbol_infos):
    - offset_to_symbol_infos: {offset: [symbol_info1, ...]}
    - name_to_symbol_infos: {name: [symbol_info1, ...]}
  """
  symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)
  symbol_infos_no_suffixes = [
      s._replace(name=RemoveSuffixes(s.name)) for s in symbol_infos]
  return (symbol_extractor.GroupSymbolInfosByOffset(symbol_infos_no_suffixes),
          symbol_extractor.GroupSymbolInfosByName(symbol_infos_no_suffixes))
def _GroupSymbolsByOffset(binary_filename):
  """Produce a map symbol name -> all symbol names at same offset.

  Suffixes are stripped.
  """
  symbol_infos = [
      s._replace(name=RemoveSuffixes(s.name))
      for s in symbol_extractor.SymbolInfosFromBinary(binary_filename)]
  offset_map = symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)
  missing_offsets = 0
  sym_to_matching = {}
  for sym in symbol_infos:
    if sym.offset not in offset_map:
      missing_offsets += 1
      continue
    matching = [s.name for s in offset_map[sym.offset]]
    assert sym.name in matching
    sym_to_matching[sym.name] = matching
  return sym_to_matching
Beispiel #10
0
def main():
  parser = optparse.OptionParser(usage=
      'usage: %prog [options] <binary> <orderfile>')
  parser.add_option('--target-arch', help='Unused')
  parser.add_option('--threshold',
                    action='store',
                    dest='threshold',
                    default=80,
                    type=int,
                    help='The maximum allowed number of out-of-order symbols.')
  options, argv = parser.parse_args(sys.argv)
  if len(argv) != 3:
    parser.print_help()
    return 1
  (binary_filename, orderfile_filename) = argv[1:]

  symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)

  if not _VerifySymbolOrder([sym.strip() for sym in file(orderfile_filename)],
                            symbol_infos, options.threshold):
    return 1
Beispiel #11
0
def _ExtractAndProcessSymbols(native_library_filename):
    """Extracts, sorts and filters symbols.

  Args:
    native_library_filename: (str) Path to the native library

  Returns:
   [symbol_extractor.SymbolInfo], sorted by offset, without thunks.
  """
    logging.info('Extracting symbols')
    symbol_infos = symbol_extractor.SymbolInfosFromBinary(
        native_library_filename)
    logging.info('%d symbols' % len(symbol_infos))
    symbol_infos.sort(key=operator.attrgetter('offset'))
    real_symbols = [
        s for s in symbol_infos
        if not s.name.startswith('__ThumbV7PILongThunk_')
    ]
    logging.info('%d long jumps Thunks' %
                 (len(symbol_infos) - len(real_symbols)))
    return real_symbols
Beispiel #12
0
def CodePagesToMangledSymbols(native_library_filename):
    """From the native library, groups the symbol per code page.

  Args:
    native_library_filename: (str) Native library path.

  Returns:
    {offset: [(mangled_name, size_in_page), ...]}
  """
    symbols = symbol_extractor.SymbolInfosFromBinary(native_library_filename)
    # Different symbols can be at the same address, through identical code folding
    # for instance. In this case, only keep the first one. This is not ideal, as
    # file attribution will be incorrect in this case. However ICF mostly works
    # with small symbols, so it shouldn't impact numbers too much.
    result = collections.defaultdict(set)
    known_offsets = set()
    for s in symbols:
        assert s.offset % 2 == 0, 'Wrong alignment'
        if s.offset in known_offsets:
            continue
        known_offsets.add(s.offset)
        start, end = (s.offset, (s.offset + s.size))
        start_page, end_page = start & _PAGE_MASK, end & _PAGE_MASK
        page = start_page
        while page <= end_page:
            symbol_start_in_page = max(page, start)
            symbol_end_in_page = min(page + _PAGE_SIZE, end)
            size_in_page = symbol_end_in_page - symbol_start_in_page
            result[page].add((s.name, size_in_page))
            page += _PAGE_SIZE
    for page in result:
        total_size = sum(s[1] for s in result[page])
        if total_size > _PAGE_SIZE:
            logging.warning(
                'Too many symbols in page (%d * 4k)! Total size: %d' %
                (page / _PAGE_SIZE, total_size))
    return result
Beispiel #13
0
def _GroupLibrarySymbolInfosByOffset(lib_filename):
    """Returns a dict {offset: [SymbolInfo]} from a library."""
    symbol_infos = symbol_extractor.SymbolInfosFromBinary(lib_filename)
    return symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)