예제 #1
0
def _ComputePakFileSymbols(
    file_name, contents, res_info, symbols_by_id, compression_ratio=1):
  id_map = {id(v): k
            for k, v in sorted(contents.resources.items(), reverse=True)}
  alias_map = {k: id_map[id(v)] for k, v in contents.resources.iteritems()
               if id_map[id(v)] != k}
  # Longest locale pak is es-419.pak
  if len(os.path.basename(file_name)) <= 9:
    section_name = models.SECTION_PAK_TRANSLATIONS
  else:
    section_name = models.SECTION_PAK_NONTRANSLATED
  overhead = (12 + 6) * compression_ratio  # Header size plus extra offset
  symbols_by_id[file_name] = models.Symbol(
      section_name, overhead, full_name='{}: overhead'.format(file_name))
  for resource_id in sorted(contents.resources):
    if resource_id in alias_map:
      # 4 extra bytes of metadata (2 16-bit ints)
      size = 4
      resource_id = alias_map[resource_id]
    else:
      # 6 extra bytes of metadata (1 32-bit int, 1 16-bit int)
      size = len(contents.resources[resource_id]) + 6
      name, source_path = res_info[resource_id]
      if resource_id not in symbols_by_id:
        full_name = '{}: {}'.format(source_path, name)
        symbols_by_id[resource_id] = models.Symbol(
            section_name, 0, address=resource_id, full_name=full_name)
    size *= compression_ratio
    symbols_by_id[resource_id].size += size
예제 #2
0
 def test_Diff_Clustering(self):
     size_info1 = self._CloneSizeInfo()
     size_info2 = self._CloneSizeInfo()
     S = '.text'
     size_info1.symbols += [
         models.Symbol(S, 11, name='.L__unnamed_1193',
                       object_path='a'),  # 1
         models.Symbol(S, 22, name='.L__unnamed_1194',
                       object_path='a'),  # 2
         models.Symbol(S, 33, name='.L__unnamed_1195',
                       object_path='b'),  # 3
         models.Symbol(S, 44, name='.L__bar_195', object_path='b'),  # 4
         models.Symbol(S, 55, name='.L__bar_1195', object_path='b'),  # 5
     ]
     size_info2.symbols += [
         models.Symbol(S, 33, name='.L__unnamed_2195',
                       object_path='b'),  # 3
         models.Symbol(S, 11, name='.L__unnamed_2194',
                       object_path='a'),  # 1
         models.Symbol(S, 22, name='.L__unnamed_2193',
                       object_path='a'),  # 2
         models.Symbol(S, 44, name='.L__bar_2195', object_path='b'),  # 4
         models.Symbol(S, 55, name='.L__bar_295', object_path='b'),  # 5
     ]
     d = diff.Diff(size_info1, size_info2)
     d.symbols = d.symbols.Sorted()
     self.assertEquals(
         d.symbols.CountsByDiffStatus()[models.DIFF_STATUS_ADDED], 0)
     self.assertEquals(d.symbols.size, 0)
예제 #3
0
def _DiffSymbolGroups(before, after):
    # For changed symbols, padding is zeroed out. In order to not lose the
    # information entirely, store it in aggregate.
    padding_by_section_name = collections.defaultdict(int)

    # Usually >90% of symbols are exact matches, so all of the time is spent in
    # this first pass.
    all_deltas, before, after = _MatchSymbols(before, after, _Key1,
                                              padding_by_section_name)
    for key_func in (_Key2, _Key3, _Key4):
        delta_syms, before, after = _MatchSymbols(before, after, key_func,
                                                  padding_by_section_name)
        all_deltas.extend(delta_syms)

    logging.debug('Creating %d unmatched symbols', len(after) + len(before))
    for after_sym in after:
        all_deltas.append(models.DeltaSymbol(None, after_sym))
    for before_sym in before:
        all_deltas.append(models.DeltaSymbol(before_sym, None))

    # Create a DeltaSymbol to represent the zero'd out padding of matched symbols.
    for section_name, padding in padding_by_section_name.items():
        if padding != 0:
            after_sym = models.Symbol(section_name, padding)
            # This is after _NormalizeNames() is called, so set |full_name|,
            # |template_name|, and |name|.
            after_sym.SetName("Overhead: aggregate padding of diff'ed symbols")
            after_sym.padding = padding
            all_deltas.append(models.DeltaSymbol(None, after_sym))

    return models.DeltaSymbolGroup(all_deltas)
예제 #4
0
def _ComputePakFileSymbols(
    file_name, file_size, contents, res_info, symbols_by_name):
  total = 12 + 6  # Header size plus extra offset
  id_map = {id(v): k
            for k, v in sorted(contents.resources.items(), reverse=True)}
  alias_map = {k: id_map[id(v)] for k, v in contents.resources.iteritems()
               if id_map[id(v)] != k}
  # Longest locale pak is es-419.pak
  if len(os.path.basename(file_name)) <= 9:
    section_name = models.SECTION_PAK_TRANSLATIONS
  else:
    section_name = models.SECTION_PAK_NONTRANSLATED
  object_path = path_util.ToSrcRootRelative(file_name)
  for resource_id in sorted(contents.resources):
    if resource_id in alias_map:
      # 4 extra bytes of metadata (2 16-bit ints)
      size = 4
      name = res_info[alias_map[resource_id]][0]
    else:
      # 6 extra bytes of metadata (1 32-bit int, 1 16-bit int)
      size = len(contents.resources[resource_id]) + 6
      name, source_path = res_info[resource_id]
      if name not in symbols_by_name:
        full_name = '{}: {}'.format(source_path, name)
        symbols_by_name[name] = models.Symbol(
            section_name, 0, address=resource_id, full_name=full_name,
            source_path=source_path, object_path=object_path)
    symbols_by_name[name].size += size
    total += size
  assert file_size == total, (
      '{} bytes in pak file not accounted for'.format(file_size - total))
예제 #5
0
def _DiffSymbolGroups(before, after):
    # For changed symbols, padding is zeroed out. In order to not lose the
    # information entirely, store it in aggregate.
    padding_by_section_name = collections.defaultdict(int)

    # Usually >90% of symbols are exact matches, so all of the time is spent in
    # this first pass.
    all_deltas, before, after = _MatchSymbols(before, after, _ExactMatchKey,
                                              padding_by_section_name)
    for key_func in (_GoodMatchKey, _PoorMatchKey):
        delta_syms, before, after = _MatchSymbols(before, after, key_func,
                                                  padding_by_section_name)
        all_deltas.extend(delta_syms)

    logging.debug('Creating %d unmatched symbols', len(after) + len(before))
    for after_sym in after:
        all_deltas.append(models.DeltaSymbol(None, after_sym))
    for before_sym in before:
        all_deltas.append(models.DeltaSymbol(before_sym, None))

    # Create a DeltaSymbol to represent the zero'd out padding of matched symbols.
    for section_name, padding in padding_by_section_name.iteritems():
        if padding != 0:
            after_sym = models.Symbol(
                section_name,
                padding,
                name="** aggregate padding of diff'ed symbols")
            after_sym.padding = padding
            all_deltas.append(models.DeltaSymbol(None, after_sym))

    return models.DeltaSymbolGroup(all_deltas)
예제 #6
0
def CreateDexSymbols(apk_path, output_directory):
    apk_name = os.path.basename(apk_path)
    source_map = _LoadSourceMap(apk_name, output_directory)
    nodes = UndoHierarchicalSizing(_RunApkAnalyzer(apk_path, output_directory))
    dex_expected_size = _ExpectedDexTotalSize(apk_path)
    total_node_size = sum(map(lambda x: x[1], nodes))
    assert dex_expected_size >= total_node_size, (
        'Node size too large, check for node processing errors.')
    # We have more than 100KB of ids for methods, strings
    id_metadata_overhead_size = dex_expected_size - total_node_size
    symbols = []
    for name, node_size in nodes:
        package = name.split(' ', 1)[0]
        class_path = package.split('$')[0]
        source_path = source_map.get(class_path, '')
        if source_path:
            object_path = package
        elif package == _TOTAL_NODE_NAME:
            name = '* Unattributed Dex'
            object_path = os.path.join(apk_name, _DEX_PATH_COMPONENT)
            node_size += id_metadata_overhead_size
        else:
            object_path = os.path.join(apk_name, _DEX_PATH_COMPONENT,
                                       *package.split('.'))
        if name.endswith(')'):
            section_name = models.SECTION_DEX_METHOD
        else:
            section_name = models.SECTION_DEX
        symbols.append(
            models.Symbol(section_name,
                          node_size,
                          full_name=name,
                          object_path=object_path,
                          source_path=source_path))
    return symbols
예제 #7
0
def CreateDexSymbol(name, size, source_map, lambda_normalizer):
    parts = name.split(' ')  # (class_name, return_type, method_name)
    new_package = parts[0]

    if new_package == _TOTAL_NODE_NAME:
        return None

    # Make d8 desugared lambdas look the same as Desugar ones.
    outer_class, name = lambda_normalizer.Normalize(new_package, name)

    # Look for class merging.
    old_package = new_package
    # len(parts) == 2 for class nodes.
    if len(parts) > 2:
        method = parts[2]
        # last_idx == -1 for fields, which is fine.
        last_idx = method.find('(')
        last_idx = method.rfind('.', 0, last_idx)
        if last_idx != -1:
            old_package = method[:last_idx]
            outer_class, name = lambda_normalizer.Normalize(old_package, name)

    source_path = source_map.get(outer_class, '')
    object_path = posixpath.join(models.APK_PREFIX_PATH,
                                 *old_package.split('.'))
    if name.endswith(')'):
        section_name = models.SECTION_DEX_METHOD
    else:
        section_name = models.SECTION_DEX

    return models.Symbol(section_name,
                         size,
                         full_name=name,
                         object_path=object_path,
                         source_path=source_path)
예제 #8
0
def _AddNmAliases(raw_symbols, names_by_address):
  """Adds symbols that were removed by identical code folding."""
  # Step 1: Create list of (index_of_symbol, name_list).
  logging.debug('Creating alias list')
  replacements = []
  num_new_symbols = 0
  missing_names = collections.defaultdict(list)
  for i, s in enumerate(raw_symbols):
    # Don't alias padding-only symbols (e.g. ** symbol gap)
    if s.size_without_padding == 0:
      continue
    name_list = names_by_address.get(s.address)
    if name_list:
      if s.full_name not in name_list:
        missing_names[s.full_name].append(s.address)
        logging.warning('Name missing from aliases: %s %s', s.full_name,
                        name_list)
        continue
      replacements.append((i, name_list))
      num_new_symbols += len(name_list) - 1

  if missing_names and logging.getLogger().isEnabledFor(logging.INFO):
    for address, names in names_by_address.iteritems():
      for name in names:
        if name in missing_names:
          logging.info('Missing name %s is at address %x instead of [%s]' %
              (name, address, ','.join('%x' % a for a in missing_names[name])))

  if float(num_new_symbols) / len(raw_symbols) < .05:
    logging.warning('Number of aliases is oddly low (%.0f%%). It should '
                    'usually be around 25%%. Ensure --tool-prefix is correct. ',
                    float(num_new_symbols) / len(raw_symbols) * 100)

  # Step 2: Create new symbols as siblings to each existing one.
  logging.debug('Creating %d new symbols from nm output', num_new_symbols)
  src_cursor_end = len(raw_symbols)
  raw_symbols += [None] * num_new_symbols
  dst_cursor_end = len(raw_symbols)
  for src_index, name_list in reversed(replacements):
    # Copy over symbols that come after the current one.
    chunk_size = src_cursor_end - src_index - 1
    dst_cursor_end -= chunk_size
    src_cursor_end -= chunk_size
    raw_symbols[dst_cursor_end:dst_cursor_end + chunk_size] = (
        raw_symbols[src_cursor_end:src_cursor_end + chunk_size])
    sym = raw_symbols[src_index]
    src_cursor_end -= 1

    # Create symbols (does not bother reusing the existing symbol).
    for i, full_name in enumerate(name_list):
      dst_cursor_end -= 1
      # Do not set |aliases| in order to avoid being pruned by
      # _CompactLargeAliasesIntoSharedSymbols(), which assumes aliases differ
      # only by path. The field will be set afterwards by _ConnectNmAliases().
      raw_symbols[dst_cursor_end] = models.Symbol(
          sym.section_name, sym.size, address=sym.address, full_name=full_name)

  assert dst_cursor_end == src_cursor_end
예제 #9
0
def _AddSymbolAliases(raw_symbols, aliases_by_address):
    # Step 1: Create list of (index_of_symbol, name_list).
    logging.debug('Creating alias list')
    replacements = []
    num_new_symbols = 0
    for i, s in enumerate(raw_symbols):
        # Don't alias padding-only symbols (e.g. ** symbol gap)
        if s.size_without_padding == 0:
            continue
        name_list = aliases_by_address.get(s.address)
        if name_list:
            if s.full_name not in name_list:
                logging.warning('Name missing from aliases: %s %s',
                                s.full_name, name_list)
                continue
            replacements.append((i, name_list))
            num_new_symbols += len(name_list) - 1

    if float(num_new_symbols) / len(raw_symbols) < .05:
        # TODO(agrieve): Figure out if there's a way to get alias information from
        # clang-compiled nm.
        logging.warning(
            'Number of aliases is oddly low (%.0f%%). It should '
            'usually be around 25%%. Ensure --tool-prefix is correct. '
            'Ignore this if you compiled with clang.',
            float(num_new_symbols) / len(raw_symbols) * 100)

    # Step 2: Create new symbols as siblings to each existing one.
    logging.debug('Creating %d aliases', num_new_symbols)
    src_cursor_end = len(raw_symbols)
    raw_symbols += [None] * num_new_symbols
    dst_cursor_end = len(raw_symbols)
    for src_index, name_list in reversed(replacements):
        # Copy over symbols that come after the current one.
        chunk_size = src_cursor_end - src_index - 1
        dst_cursor_end -= chunk_size
        src_cursor_end -= chunk_size
        raw_symbols[dst_cursor_end:dst_cursor_end +
                    chunk_size] = (raw_symbols[src_cursor_end:src_cursor_end +
                                               chunk_size])
        sym = raw_symbols[src_index]
        src_cursor_end -= 1

        # Create aliases (does not bother reusing the existing symbol).
        aliases = [None] * len(name_list)
        for i, full_name in enumerate(name_list):
            aliases[i] = models.Symbol(sym.section_name,
                                       sym.size,
                                       address=sym.address,
                                       full_name=full_name,
                                       aliases=aliases)

        dst_cursor_end -= len(aliases)
        raw_symbols[dst_cursor_end:dst_cursor_end + len(aliases)] = aliases

    assert dst_cursor_end == src_cursor_end
예제 #10
0
def _DiffSymbolGroups(before, after):
    before_symbols_by_key = collections.defaultdict(list)
    for s in before:
        before_symbols_by_key[_SymbolKey(s)].append(s)

    similar = []
    diffed_symbol_by_after_aliases = {}
    matched_before_aliases = set()
    unmatched_after_syms = []
    # For similar symbols, padding is zeroed out. In order to not lose the
    # information entirely, store it in aggregate.
    padding_by_section_name = collections.defaultdict(int)

    # Step 1: Create all delta symbols and record unmatched symbols.
    for after_sym in after:
        matching_syms = before_symbols_by_key.get(_SymbolKey(after_sym))
        if matching_syms:
            before_sym = matching_syms.pop(0)
            if before_sym.IsGroup() and after_sym.IsGroup():
                similar.append(_DiffSymbolGroups(before_sym, after_sym))
            else:
                if before_sym.aliases:
                    matched_before_aliases.add(id(before_sym.aliases))
                similar.append(
                    _DiffSymbol(before_sym, after_sym,
                                diffed_symbol_by_after_aliases,
                                padding_by_section_name))
        else:
            unmatched_after_syms.append(after_sym)
            continue

    # Step 2: Copy symbols only in "after" (being careful with aliases).
    added = _CloneUnmatched(unmatched_after_syms,
                            diffed_symbol_by_after_aliases)

    # Step 3: Negate symbols only in "before" (being careful with aliases).
    removed = []
    negated_symbol_by_before_aliases = {}
    for remaining_syms in before_symbols_by_key.itervalues():
        removed.extend(
            _NegateAndClone(remaining_syms, matched_before_aliases,
                            negated_symbol_by_before_aliases))

    # Step 4: Create ** symbols to represent padding differences.
    for section_name, padding in padding_by_section_name.iteritems():
        if padding != 0:
            similar.append(
                models.Symbol(section_name,
                              padding,
                              name="** aggregate padding of diff'ed symbols"))
    return models.SymbolDiff(added,
                             removed,
                             similar,
                             name=after.name,
                             full_name=after.full_name,
                             section_name=after.section_name)
예제 #11
0
def _AssignNmAliasPathsAndCreatePathAliases(raw_symbols, object_paths_by_name):
    num_found_paths = 0
    num_unknown_names = 0
    num_path_mismatches = 0
    num_aliases_created = 0
    ret = []
    for symbol in raw_symbols:
        ret.append(symbol)
        full_name = symbol.full_name
        if (symbol.IsBss() or not full_name or full_name[0] in '*.'
                or  # e.g. ** merge symbols, .Lswitch.table
                full_name == 'startup'):
            continue

        object_paths = object_paths_by_name.get(full_name)
        if object_paths:
            num_found_paths += 1
        else:
            if num_unknown_names < 10:
                logging.warning('Symbol not found in any .o files: %r', symbol)
            num_unknown_names += 1
            continue

        if symbol.object_path and symbol.object_path not in object_paths:
            if num_path_mismatches < 10:
                logging.warning(
                    'Symbol path reported by .map not found by nm.')
                logging.warning('sym=%r', symbol)
                logging.warning('paths=%r', object_paths)
            object_paths.append(symbol.object_path)
            object_paths.sort()
            num_path_mismatches += 1

        symbol.object_path = object_paths[0]

        if len(object_paths) > 1:
            # Create one symbol for each object_path.
            aliases = symbol.aliases or [symbol]
            symbol.aliases = aliases
            num_aliases_created += len(object_paths) - 1
            for object_path in object_paths[1:]:
                new_sym = models.Symbol(symbol.section_name,
                                        symbol.size,
                                        address=symbol.address,
                                        full_name=full_name,
                                        object_path=object_path,
                                        aliases=aliases)
                aliases.append(new_sym)
                ret.append(new_sym)

    logging.debug(
        'Cross-referenced %d symbols with nm output. '
        'num_unknown_names=%d num_path_mismatches=%d '
        'num_aliases_created=%d', num_found_paths, num_unknown_names,
        num_path_mismatches, num_aliases_created)
    return ret
예제 #12
0
def _MakeSym(section, size, path, name=None):
    if name is None:
        # Trailing letter is important since diffing trims numbers.
        name = '{}_{}A'.format(section[1:], size)
    return models.Symbol(section,
                         size,
                         full_name=name,
                         template_name=name,
                         name=name,
                         object_path=path)
예제 #13
0
def _ParseApkOtherSymbols(section_sizes, apk_path):
  apk_symbols = []
  zip_info_total = 0
  with zipfile.ZipFile(apk_path) as z:
    for zip_info in z.infolist():
      zip_info_total += zip_info.compress_size
      # Skip shared library and pak files as they are already accounted for.
      if (zip_info.filename.endswith('.so')
          or zip_info.filename.endswith('.pak')):
        continue
      apk_symbols.append(models.Symbol(
            models.SECTION_OTHER, zip_info.compress_size,
            full_name=zip_info.filename))
  overhead_size = os.path.getsize(apk_path) - zip_info_total
  zip_overhead_symbol = models.Symbol(
      models.SECTION_OTHER, overhead_size, full_name='Overhead: APK file')
  apk_symbols.append(zip_overhead_symbol)
  prev = section_sizes.setdefault(models.SECTION_OTHER, 0)
  section_sizes[models.SECTION_OTHER] = prev + sum(s.size for s in apk_symbols)
  return apk_symbols
예제 #14
0
def _AddNmAliases(raw_symbols, names_by_address):
  """Adds symbols that were removed by identical code folding."""
  # Step 1: Create list of (index_of_symbol, name_list).
  logging.debug('Creating alias list')
  replacements = []
  num_new_symbols = 0
  missing_names = collections.defaultdict(list)
  for i, s in enumerate(raw_symbols):
    # Don't alias padding-only symbols (e.g. ** symbol gap)
    if s.size_without_padding == 0:
      continue
    name_list = names_by_address.get(s.address)
    if name_list:
      if s.full_name not in name_list:
        missing_names[s.full_name].append(s.address)
        logging.warning('Name missing from aliases: %s %s', s.full_name,
                        name_list)
        continue
      replacements.append((i, name_list))
      num_new_symbols += len(name_list) - 1

  if missing_names and logging.getLogger().isEnabledFor(logging.INFO):
    for address, names in names_by_address.iteritems():
      for name in names:
        if name in missing_names:
          logging.info('Missing name %s is at address %x instead of [%s]' %
              (name, address, ','.join('%x' % a for a in missing_names[name])))

  if float(num_new_symbols) / len(raw_symbols) < .05:
    logging.warning('Number of aliases is oddly low (%.0f%%). It should '
                    'usually be around 25%%. Ensure --tool-prefix is correct. ',
                    float(num_new_symbols) / len(raw_symbols) * 100)

  # Step 2: Create new symbols as siblings to each existing one.
  logging.debug('Creating %d new symbols from nm output', num_new_symbols)
  expected_num_symbols = len(raw_symbols) + num_new_symbols
  ret = []
  prev_src = 0
  for cur_src, name_list in replacements:
    ret += raw_symbols[prev_src:cur_src]
    prev_src = cur_src + 1
    sym = raw_symbols[cur_src]
    # Create symbols (|sym| gets recreated and discarded).
    new_syms = []
    for full_name in name_list:
      # Do not set |aliases| in order to avoid being pruned by
      # _CompactLargeAliasesIntoSharedSymbols(), which assumes aliases differ
      # only by path. The field will be set afterwards by _ConnectNmAliases().
      new_syms.append(models.Symbol(
          sym.section_name, sym.size, address=sym.address, full_name=full_name))
    ret += new_syms
  ret += raw_symbols[prev_src:]
  assert expected_num_symbols == len(ret)
  return ret
예제 #15
0
 def test_Diff_Clustering(self):
   size_info1 = self._CloneSizeInfo()
   size_info2 = self._CloneSizeInfo()
   S = '.text'
   size_info1.symbols += [
       models.Symbol(S, 11, name='.L__unnamed_1193', object_path='a'), # 1
       models.Symbol(S, 22, name='.L__unnamed_1194', object_path='a'), # 2
       models.Symbol(S, 33, name='.L__unnamed_1195', object_path='b'), # 3
       models.Symbol(S, 44, name='.L__bar_195', object_path='b'), # 4
       models.Symbol(S, 55, name='.L__bar_1195', object_path='b'), # 5
   ]
   size_info2.symbols += [
       models.Symbol(S, 33, name='.L__unnamed_2195', object_path='b'), # 3
       models.Symbol(S, 11, name='.L__unnamed_2194', object_path='a'), # 1
       models.Symbol(S, 22, name='.L__unnamed_2193', object_path='a'), # 2
       models.Symbol(S, 44, name='.L__bar_2195', object_path='b'), # 4
       models.Symbol(S, 55, name='.L__bar_295', object_path='b'), # 5
   ]
   d = diff.Diff(size_info1, size_info2)
   self.assertEquals(d.symbols.added_count, 0)
   self.assertEquals(d.symbols.size, 0)
예제 #16
0
def _CloneSymbol(sym, size):
    """Returns a copy of |sym| with an updated |size|.

  Padding and aliases are not copied.
  """
    return models.Symbol(sym.section_name,
                         size,
                         address=sym.address,
                         name=sym.name,
                         source_path=sym.source_path,
                         object_path=sym.object_path,
                         full_name=sym.full_name,
                         flags=sym.flags)
예제 #17
0
def _ParseApkOtherSymbols(section_sizes, apk_path, apk_so_path):
  apk_name = os.path.basename(apk_path)
  apk_symbols = []
  zip_info_total = 0
  with zipfile.ZipFile(apk_path) as z:
    for zip_info in z.infolist():
      zip_info_total += zip_info.compress_size
      # Skip main shared library, pak, and dex files as they are accounted for.
      if (zip_info.filename == apk_so_path
          or zip_info.filename.endswith('.dex')
          or zip_info.filename.endswith('.pak')):
        continue
      path = os.path.join(apk_name, 'other', zip_info.filename)
      apk_symbols.append(models.Symbol(
            models.SECTION_OTHER, zip_info.compress_size,
            object_path=path, full_name=os.path.basename(zip_info.filename)))
  overhead_size = os.path.getsize(apk_path) - zip_info_total
  assert overhead_size >= 0, 'Apk overhead must be non-negative'
  zip_overhead_symbol = models.Symbol(
      models.SECTION_OTHER, overhead_size, full_name='Overhead: APK file')
  apk_symbols.append(zip_overhead_symbol)
  prev = section_sizes.setdefault(models.SECTION_OTHER, 0)
  section_sizes[models.SECTION_OTHER] = prev + sum(s.size for s in apk_symbols)
  return apk_symbols
예제 #18
0
def _AddSymbolAliases(raw_symbols, aliases_by_address):
    # Step 1: Create list of (index_of_symbol, name_list).
    logging.debug('Creating alias list')
    replacements = []
    num_new_symbols = 0
    for i, s in enumerate(raw_symbols):
        # Don't alias padding-only symbols (e.g. ** symbol gap)
        if s.size_without_padding == 0:
            continue
        name_list = aliases_by_address.get(s.address)
        if name_list:
            if s.name not in name_list:
                logging.warning('Name missing from aliases: %s %s', s.name,
                                name_list)
                continue
            replacements.append((i, name_list))
            num_new_symbols += len(name_list) - 1

    # Step 2: Create new symbols as siblings to each existing one.
    logging.debug('Creating %d aliases', num_new_symbols)
    src_cursor_end = len(raw_symbols)
    raw_symbols += [None] * num_new_symbols
    dst_cursor_end = len(raw_symbols)
    for src_index, name_list in reversed(replacements):
        # Copy over symbols that come after the current one.
        chunk_size = src_cursor_end - src_index - 1
        dst_cursor_end -= chunk_size
        src_cursor_end -= chunk_size
        raw_symbols[dst_cursor_end:dst_cursor_end +
                    chunk_size] = (raw_symbols[src_cursor_end:src_cursor_end +
                                               chunk_size])
        sym = raw_symbols[src_index]
        src_cursor_end -= 1

        # Create aliases (does not bother reusing the existing symbol).
        aliases = [None] * len(name_list)
        for i, name in enumerate(name_list):
            aliases[i] = models.Symbol(sym.section_name,
                                       sym.size,
                                       address=sym.address,
                                       name=name,
                                       aliases=aliases)

        dst_cursor_end -= len(aliases)
        raw_symbols[dst_cursor_end:dst_cursor_end + len(aliases)] = aliases

    assert dst_cursor_end == src_cursor_end
예제 #19
0
def CreateDexSymbols(apk_path, mapping_path, size_info_prefix):
    source_map = _ParseJarInfoFile(size_info_prefix + '.jar.info')

    nodes = _RunApkAnalyzer(apk_path, mapping_path)
    nodes = UndoHierarchicalSizing(nodes)

    dex_expected_size = _ExpectedDexTotalSize(apk_path)
    total_node_size = sum(map(lambda x: x[2], nodes))
    # TODO(agrieve): Figure out why this log is triggering for
    #     ChromeModernPublic.apk (https://crbug.com/851535).
    # Reporting: dex_expected_size=6546088 total_node_size=6559549
    if dex_expected_size < total_node_size:
        logging.error(
            'Node size too large, check for node processing errors. '
            'dex_expected_size=%d total_node_size=%d', dex_expected_size,
            total_node_size)
    # We have more than 100KB of ids for methods, strings
    id_metadata_overhead_size = dex_expected_size - total_node_size
    symbols = []
    for _, name, node_size in nodes:
        package = name.split(' ', 1)[0]
        class_path = package.split('$')[0]
        source_path = source_map.get(class_path, '')
        if source_path:
            object_path = package
        elif package == _TOTAL_NODE_NAME:
            name = '* Unattributed Dex'
            object_path = ''  # Categorize in the anonymous section.
            node_size += id_metadata_overhead_size
        else:
            object_path = os.path.join(models.APK_PREFIX_PATH,
                                       *package.split('.'))
        if name.endswith(')'):
            section_name = models.SECTION_DEX_METHOD
        else:
            section_name = models.SECTION_DEX
        symbols.append(
            models.Symbol(section_name,
                          node_size,
                          full_name=name,
                          object_path=object_path,
                          source_path=source_path))
    return symbols
예제 #20
0
  def _ParseCommonSymbols(self):
    # Common symbol       size              file
    #
    # ff_cos_131072       0x40000           obj/third_party/<snip>
    # ff_cos_131072_fixed
    #                     0x20000           obj/third_party/<snip>
    ret = []
    next(self._lines)  # Skip past blank line

    name, size_str, path = None, None, None
    for l in self._lines:
      parts = self._ParsePossiblyWrappedParts(l, 3)
      if not parts:
        break
      name, size_str, path = parts
      sym = models.Symbol(models.SECTION_BSS,  int(size_str[2:], 16),
                          full_name=name, object_path=path)
      ret.append(sym)
    return ret
예제 #21
0
    def _ParseCommonSymbols(self):
        # Common symbol       size              file
        #
        # ff_cos_131072       0x40000           obj/third_party/<snip>
        # ff_cos_131072_fixed
        #                     0x20000           obj/third_party/<snip>
        self._SkipToLineWithPrefix('Common symbol')
        next(self._lines)  # Skip past blank line

        name, size_str, path = None, None, None
        for l in self._lines:
            parts = self._ParsePossiblyWrappedParts(l, 3)
            if not parts:
                break
            name, size_str, path = parts
            self._symbols.append(
                models.Symbol('.bss',
                              int(size_str[2:], 16),
                              name=name,
                              object_path=path))
예제 #22
0
파일: archive.py 프로젝트: aixliu/chromium
def _ParsePakSymbols(section_sizes, object_paths, output_directory,
                     symbols_by_id):
    for path in object_paths:
        whitelist_path = os.path.join(output_directory, path + '.whitelist')
        if (not os.path.exists(whitelist_path)
                or os.path.getsize(whitelist_path) == 0):
            continue
        with open(whitelist_path, 'r') as f:
            for line in f:
                resource_id = int(line.rstrip())
                # There may be object files in static libraries that are removed by the
                # linker when there are no external references to its symbols. These
                # files may be included in object_paths which our apk does not use,
                # resulting in resource_ids that don't end up being in the final apk.
                if resource_id not in symbols_by_id:
                    continue
                symbols_by_id[resource_id].object_path = path

    raw_symbols = sorted(symbols_by_id.values(),
                         key=lambda s: (s.section_name, s.address))
    raw_total = 0.0
    int_total = 0
    for symbol in raw_symbols:
        raw_total += symbol.size
        # We truncate rather than round to ensure that we do not over attribute. It
        # is easier to add another symbol to make up the difference.
        symbol.size = int(symbol.size)
        int_total += symbol.size
    # Attribute excess to translations since only those are compressed.
    raw_symbols.append(
        models.Symbol(models.SECTION_PAK_TRANSLATIONS,
                      int(round(raw_total - int_total)),
                      full_name='Pak compression leftover artifacts'))

    for symbol in raw_symbols:
        prev = section_sizes.setdefault(symbol.section_name, 0)
        section_sizes[symbol.section_name] = prev + symbol.size
    return raw_symbols
예제 #23
0
def _DiffSymbolGroups(containers, before, after):
    # For changed symbols, padding is zeroed out. In order to not lose the
    # information entirely, store it in aggregate. These aggregations are grouped
    # by "segment names", which are (container name, section name) tuples.
    padding_by_segment = collections.defaultdict(float)

    # Usually >90% of symbols are exact matches, so all of the time is spent in
    # this first pass.
    all_deltas, before, after = _MatchSymbols(before, after, _Key1,
                                              padding_by_segment)
    for key_func in (_Key2, _Key3, _Key4):
        delta_syms, before, after = _MatchSymbols(before, after, key_func,
                                                  padding_by_segment)
        all_deltas.extend(delta_syms)

    logging.debug('Creating %d unmatched symbols', len(after) + len(before))
    for after_sym in after:
        all_deltas.append(models.DeltaSymbol(None, after_sym))
    for before_sym in before:
        all_deltas.append(models.DeltaSymbol(before_sym, None))

    container_from_name = {c.name: c for c in containers}

    # Create a DeltaSymbol to represent the zero'd out padding of matched symbols.
    for (container_name, section_name), padding in padding_by_segment.items():
        # Values need to be integer (crbug.com/1132394).
        padding = round(padding)
        if padding != 0:
            after_sym = models.Symbol(section_name, padding)
            after_sym.container = container_from_name[container_name]
            # This is after _NormalizeNames() is called, so set |full_name|,
            # |template_name|, and |name|.
            after_sym.SetName("Overhead: aggregate padding of diff'ed symbols")
            after_sym.padding = padding
            all_deltas.append(models.DeltaSymbol(None, after_sym))

    return models.DeltaSymbolGroup(all_deltas)
예제 #24
0
from flaskapp import db
import models
from ImageFile import ImageFile
from features import feature_histogram, trim, zoning_method

zeros = ['0-00a.bmp', '0-00b.bmp', '0-00c.bmp', '0-00d.bmp', '0-00e.bmp']
ones = ['1-00a.bmp', '1-00b.bmp', '1-00c.bmp', '1-00d.bmp', '1-00e.bmp']
for j in range(2):
    if j == 0:
        s = models.Symbol(name="zero")
    else:
        s = models.Symbol(name="one")
    db.session.add(s)
    db.session.commit()

    for i in range(5):
        if j == 0:
            path = "./images/zero/" + zeros[i]
        else:
            path = "./images/one/" + ones[i]

        img = ImageFile(path)
        trimmed = trim(img)
        img_vector = zoning_method(trimmed)

        for k in range(16):
            if k == 0:
                v = models.V1(histogram_value=img_vector[k], number=s)
            elif k == 1:
                v = models.V2(histogram_value=img_vector[k], number=s)
            elif k == 2:
예제 #25
0
    def _ParseSections(self):
        # .text           0x0028c600  0x22d3468
        #  .text.startup._GLOBAL__sub_I_bbr_sender.cc
        #                 0x0028c600       0x38 obj/net/net/bbr_sender.o
        #  .text._reset   0x00339d00       0xf0 obj/third_party/icu/icuuc/ucnv.o
        #  ** fill        0x0255fb00   0x02
        #  .text._ZN4base8AutoLockD2Ev
        #                 0x00290710        0xe obj/net/net/file_name.o
        #                 0x00290711                base::AutoLock::~AutoLock()
        #                 0x00290711                base::AutoLock::~AutoLock()
        # .text._ZNK5blink15LayoutBlockFlow31mustSeparateMarginAfterForChildERK...
        #                0xffffffffffffffff       0x46 obj/...
        #                0x006808e1                blink::LayoutBlockFlow::...
        # .bss
        #  .bss._ZGVZN11GrProcessor11initClassIDI10LightingFPEEvvE8kClassID
        #                0x02d4b294        0x4 obj/skia/skia/SkLightingShader.o
        #                0x02d4b294   guard variable for void GrProcessor::initClassID
        # .data           0x0028c600  0x22d3468
        #  .data.rel.ro._ZTVN3gvr7android19ScopedJavaGlobalRefIP12_jfloatArrayEE
        #                0x02d1e668       0x10 ../../third_party/.../libfoo.a(bar.o)
        #                0x02d1e668   vtable for gvr::android::GlobalRef<_jfloatArray*>
        #  ** merge strings
        #                 0x0255fb00   0x1f2424
        #  ** merge constants
        #                 0x0255fb00   0x8
        # ** common      0x02db5700   0x13ab48
        syms = self._symbols
        while True:
            line = self._SkipToLineWithPrefix('.')
            if not line:
                break
            section_name = None
            try:
                # Parse section name and size.
                parts = self._ParsePossiblyWrappedParts(line, 3)
                if not parts:
                    break
                section_name, section_address_str, section_size_str = parts
                section_address = int(section_address_str[2:], 16)
                section_size = int(section_size_str[2:], 16)
                self._section_sizes[section_name] = section_size
                if (section_name in (models.SECTION_BSS, models.SECTION_RODATA,
                                     models.SECTION_TEXT)
                        or section_name.startswith(models.SECTION_DATA)):
                    logging.info('Parsing %s', section_name)
                    if section_name == models.SECTION_BSS:
                        # Common symbols have no address.
                        syms.extend(self._common_symbols)
                    prefix_len = len(
                        section_name) + 1  # + 1 for the trailing .
                    symbol_gap_count = 0
                    merge_symbol_start_address = section_address
                    sym_count_at_start = len(syms)
                    line = next(self._lines)
                    # Parse section symbols.
                    while True:
                        if not line or line.isspace():
                            break
                        if line.startswith(' **'):
                            zero_index = line.find('0')
                            if zero_index == -1:
                                # Line wraps.
                                name = line.strip()
                                line = next(self._lines)
                            else:
                                # Line does not wrap.
                                name = line[:zero_index].strip()
                                line = line[zero_index:]
                            address_str, size_str = self._ParsePossiblyWrappedParts(
                                line, 2)
                            line = next(self._lines)
                            # These bytes are already accounted for.
                            if name == '** common':
                                continue
                            address = int(address_str[2:], 16)
                            size = int(size_str[2:], 16)
                            path = None
                            sym = models.Symbol(section_name,
                                                size,
                                                address=address,
                                                full_name=name,
                                                object_path=path)
                            syms.append(sym)
                            if merge_symbol_start_address > 0:
                                merge_symbol_start_address += size
                        else:
                            # A normal symbol entry.
                            subsection_name, address_str, size_str, path = (
                                self._ParsePossiblyWrappedParts(line, 4))
                            size = int(size_str[2:], 16)
                            assert subsection_name.startswith(section_name), (
                                'subsection name was: ' + subsection_name)
                            mangled_name = subsection_name[prefix_len:]
                            name = None
                            address_str2 = None
                            while True:
                                line = next(self._lines).rstrip()
                                if not line or line.startswith(' .'):
                                    break
                                # clang includes ** fill, but gcc does not.
                                if line.startswith(' ** fill'):
                                    # Alignment explicitly recorded in map file. Rather than
                                    # record padding based on these entries, we calculate it
                                    # using addresses. We do this because fill lines are not
                                    # present when compiling with gcc (only for clang).
                                    continue
                                elif line.startswith(' **'):
                                    break
                                elif name is None:
                                    address_str2, name = self._ParsePossiblyWrappedParts(
                                        line, 2)

                            if address_str == '0xffffffffffffffff':
                                # The section needs special handling (e.g., a merge section)
                                # It also generally has a large offset after it, so don't
                                # penalize the subsequent symbol for this gap (e.g. a 50kb gap).
                                # There seems to be no corelation between where these gaps occur
                                # and the symbols they come in-between.
                                # TODO(agrieve): Learn more about why this happens.
                                if address_str2:
                                    address = int(address_str2[2:], 16) - 1
                                elif syms and syms[-1].address > 0:
                                    # Merge sym with no second line showing real address.
                                    address = syms[-1].end_address
                                else:
                                    logging.warning(
                                        'First symbol of section had address -1'
                                    )
                                    address = 0

                                merge_symbol_start_address = address + size
                            else:
                                address = int(address_str[2:], 16)
                                # Finish off active address gap / merge section.
                                if merge_symbol_start_address:
                                    merge_size = address - merge_symbol_start_address
                                    merge_symbol_start_address = 0
                                    if merge_size > 0:
                                        # merge_size == 0 for the initial symbol generally.
                                        logging.debug(
                                            'Merge symbol of size %d found at:\n  %r',
                                            merge_size, syms[-1])
                                        # Set size=0 so that it will show up as padding.
                                        sym = models.Symbol(
                                            section_name,
                                            0,
                                            address=address,
                                            full_name='** symbol gap %d' %
                                            symbol_gap_count)
                                        symbol_gap_count += 1
                                        syms.append(sym)

                            #  .text.res_findResource_60
                            #                 0x00178de8       0x12a obj/...
                            #                 0x00178de9                res_findResource_60
                            #  .text._ZN3url6ParsedC2Ev
                            #                 0x0021ad62       0x2e obj/url/url/url_parse.o
                            #                 0x0021ad63                url::Parsed::Parsed()
                            #  .text.unlikely._ZN4base3CPUC2Ev
                            #                 0x003f9d3c       0x48 obj/base/base/cpu.o
                            #                 0x003f9d3d                base::CPU::CPU()
                            full_name = name
                            if mangled_name and (not name or
                                                 mangled_name.startswith('_Z')
                                                 or '._Z' in mangled_name):
                                full_name = mangled_name

                            sym = models.Symbol(section_name,
                                                size,
                                                address=address,
                                                full_name=full_name,
                                                object_path=path)
                            syms.append(sym)
                    section_end_address = section_address + section_size
                    if section_name != models.SECTION_BSS and (
                            syms[-1].end_address < section_end_address):
                        # Set size=0 so that it will show up as padding.
                        sym = models.Symbol(
                            section_name,
                            0,
                            address=section_end_address,
                            full_name=('** symbol gap %d (end of section)' %
                                       symbol_gap_count))
                        syms.append(sym)
                    logging.debug('Symbol count for %s: %d', section_name,
                                  len(syms) - sym_count_at_start)
            except:
                logging.error('Problem line: %r', line)
                logging.error('In section: %r', section_name)
                raise
예제 #26
0
 def Create(self, *args, **kwargs):
     self.Flush()
     self.cur_sym = models.Symbol(*args, **kwargs)
예제 #27
0
def _CreateMergeStringsReplacements(merge_string_syms,
                                    list_of_positions_by_object_path):
  """Creates replacement symbols for |merge_syms|."""
  ret = []
  STRING_LITERAL_NAME = models.STRING_LITERAL_NAME
  assert len(merge_string_syms) == len(list_of_positions_by_object_path)
  tups = itertools.izip(merge_string_syms, list_of_positions_by_object_path)
  for merge_sym, positions_by_object_path in tups:
    merge_sym_address = merge_sym.address
    new_symbols = []
    ret.append(new_symbols)
    for object_path, positions in positions_by_object_path.iteritems():
      for offset, size in positions:
        address = merge_sym_address + offset
        symbol = models.Symbol(
            models.SECTION_RODATA, size, address, STRING_LITERAL_NAME,
            object_path=object_path)
        new_symbols.append(symbol)

  logging.debug('Created %d string literal symbols', sum(len(x) for x in ret))
  logging.debug('Sorting string literals')
  for symbols in ret:
    # In order to achieve a total ordering in the presense of aliases, need to
    # include both |address| and |object_path|.
    # In order to achieve consistent deduping, need to include |size|.
    symbols.sort(key=lambda x: (x.address, -x.size, x.object_path))

  logging.debug('Deduping string literals')
  num_removed = 0
  size_removed = 0
  num_aliases = 0
  for i, symbols in enumerate(ret):
    if not symbols:
      continue
    prev_symbol = symbols[0]
    new_symbols = [prev_symbol]
    for symbol in symbols[1:]:
      padding = symbol.address - prev_symbol.end_address
      if (prev_symbol.address == symbol.address and
          prev_symbol.size == symbol.size):
        # String is an alias.
        num_aliases += 1
        aliases = prev_symbol.aliases
        if aliases:
          aliases.append(symbol)
          symbol.aliases = aliases
        else:
          aliases = [prev_symbol, symbol]
          prev_symbol.aliases = aliases
          symbol.aliases = aliases
      elif padding + symbol.size <= 0:
        # String is a substring of prior one.
        num_removed += 1
        size_removed += symbol.size
        continue
      elif padding < 0:
        # String overlaps previous one. Adjust to not overlap.
        symbol.address -= padding
        symbol.size += padding
      new_symbols.append(symbol)
      prev_symbol = symbol
    ret[i] = new_symbols
    # Aliases come out in random order, so sort to be deterministic.
    ret[i].sort(key=lambda s: (s.address, s.object_path))

  logging.debug(
      'Removed %d overlapping string literals (%d bytes) & created %d aliases',
                num_removed, size_removed, num_aliases)
  return ret
예제 #28
0
def CreateSectionSizesAndSymbols(
      map_path=None, tool_prefix=None, output_directory=None, elf_path=None,
      apk_path=None, track_string_literals=True, metadata=None,
      apk_elf_result=None, pak_files=None, pak_info_file=None,
      knobs=SectionSizeKnobs()):
  """Creates sections sizes and symbols for a SizeInfo.

  Args:
    map_path: Path to the linker .map(.gz) file to parse.
    elf_path: Path to the corresponding unstripped ELF file. Used to find symbol
        aliases and inlined functions. Can be None.
    tool_prefix: Prefix for c++filt & nm (required).
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
    track_string_literals: Whether to break down "** merge string" sections into
        smaller symbols (requires output_directory).
  """
  source_mapper = None
  elf_object_paths = None
  if output_directory:
    # Start by finding the elf_object_paths, so that nm can run on them while
    # the linker .map is being parsed.
    logging.info('Parsing ninja files.')
    source_mapper, elf_object_paths = ninja_parser.Parse(
        output_directory, elf_path)
    logging.debug('Parsed %d .ninja files.', source_mapper.parsed_file_count)
    assert not elf_path or elf_object_paths, (
        'Failed to find link command in ninja files for ' +
        os.path.relpath(elf_path, output_directory))

  section_sizes, raw_symbols = _ParseElfInfo(
      map_path, elf_path, tool_prefix, output_directory, track_string_literals,
      elf_object_paths)
  elf_overhead_size = _CalculateElfOverhead(section_sizes, elf_path)

  pak_symbols_by_id = None
  if apk_path:
    pak_symbols_by_id = _FindPakSymbolsFromApk(apk_path, output_directory,
                                               knobs)
    section_sizes, elf_overhead_size = _ParseApkElfSectionSize(
        section_sizes, metadata, apk_elf_result)
    raw_symbols.extend(_ParseApkOtherSymbols(section_sizes, apk_path))
  elif pak_files and pak_info_file:
    pak_symbols_by_id = _FindPakSymbolsFromFiles(
        pak_files, pak_info_file, output_directory)

  if elf_path:
    elf_overhead_symbol = models.Symbol(
        models.SECTION_OTHER, elf_overhead_size, full_name='Overhead: ELF file')
    prev = section_sizes.setdefault(models.SECTION_OTHER, 0)
    section_sizes[models.SECTION_OTHER] = prev + elf_overhead_size
    raw_symbols.append(elf_overhead_symbol)

  if pak_symbols_by_id:
    object_paths = (p for p in source_mapper.IterAllPaths() if p.endswith('.o'))
    pak_raw_symbols = _ParsePakSymbols(
        section_sizes, object_paths, output_directory, pak_symbols_by_id)
    raw_symbols.extend(pak_raw_symbols)

  _ExtractSourcePathsAndNormalizeObjectPaths(raw_symbols, source_mapper)
  logging.info('Converting excessive aliases into shared-path symbols')
  _CompactLargeAliasesIntoSharedSymbols(raw_symbols, knobs)
  logging.debug('Connecting nm aliases')
  _ConnectNmAliases(raw_symbols)
  return section_sizes, raw_symbols
예제 #29
0
  def Parse(self, lines):
    """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_ranges, symbols).
    """
    # Newest format:
    #     VMA      LMA     Size Align Out     In      Symbol
    #     194      194       13     1 .interp
    #     194      194       13     1         <internal>:(.interp)
    #     1a8      1a8     22d8     4 .ARM.exidx
    #     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
    #     400      400   123400    64 .text
    #     600      600       14     4         ...:(.text.OUTLINED_FUNCTION_0)
    #     600      600        0     1                 $x.3
    #     600      600       14     1                 OUTLINED_FUNCTION_0
    #  123800   123800    20000   256 .rodata
    #  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
    #  123800   123800       4      1                 foo (.llvm.1234)
    #  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
    #  123804   123804       4      1                 bar.llvm.1234
    # Older format:
    # Address          Size             Align Out     In      Symbol
    # 00000000002002a8 000000000000001c     1 .interp
    # 00000000002002a8 000000000000001c     1         <internal>:(.interp)
    # ...
    # 0000000000201000 0000000000000202    16 .text
    # 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
    # 0000000000201000 0000000000000000     0                 _start
    # 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
    # 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
    # 0000000000201030 0000000000000000     0             deregister_tm_clones
    # 0000000000201060 0000000000000000     0             register_tm_clones
    # 00000000002010a0 0000000000000000     0             __do_global_dtors_aux
    # 00000000002010c0 0000000000000000     0             frame_dummy
    # 00000000002010ed 0000000000000071     1         a.o:(.text)
    # 00000000002010ed 0000000000000071     0             main
    syms = []
    cur_section = None
    cur_section_is_useful = False
    promoted_name_count = 0
    # |is_partial| indicates that an eligible Level 3 line should be used to
    # update |syms[-1].full_name| instead of creating a new symbol.
    is_partial = False
    # Assembly code can create consecutive Level 3 lines with |size == 0|. These
    # lines can represent
    #  (1) assembly functions (should form symbol), or
    #  (2) assembly labels (should NOT form symbol).
    # It seems (2) correlates with the presence of a leading Level 3 line with
    # |size > 0|. This gives rise to the following strategy: Each symbol S from
    # a Level 3 line suppresses Level 3 lines with |address| less than
    # |next_usable_address := S.address + S.size|.
    next_usable_address = 0

    # For Thin-LTO, a map from each address to the Thin-LTO cache file. This
    # provides hints downstream to identify object_paths for .L.ref.tmp symbols,
    # but is not useful in the final output. Therefore it's stored separately,
    # instead of being in Symbol.
    thin_map = {}

    tokenizer = self.Tokenize(lines)

    in_partitions = False
    in_jump_table = False
    jump_tables_count = 0
    jump_entries_count = 0

    for (line, address, size, level, span, tok) in tokenizer:
      # Level 1 data match the "Out" column. They specify sections or
      # PROVIDE_HIDDEN lines.
      if level == 1:
        # Ignore sections that belong to feature library partitions. Seeing a
        # partition name is an indicator that we've entered a list of feature
        # partitions. After these, a single .part.end section will follow to
        # reserve memory at runtime. Seeing the .part.end section also marks the
        # end of partition sections in the map file.
        if tok.endswith('_partition'):
          in_partitions = True
        elif tok == '.part.end':
          # Note that we want to retain .part.end section, so it's fine to
          # restart processing on this section, rather than the next one.
          in_partitions = False

        if in_partitions:
          # For now, completely ignore feature partitions.
          cur_section = None
          cur_section_is_useful = False
        else:
          if not tok.startswith('PROVIDE_HIDDEN'):
            self._section_ranges[tok] = (address, size)
          cur_section = tok
          # E.g., Want to convert "(.text._name)" -> "_name" later.
          mangled_start_idx = len(cur_section) + 2
          cur_section_is_useful = (
              cur_section in models.BSS_SECTIONS
              or cur_section in (models.SECTION_RODATA, models.SECTION_TEXT)
              or cur_section.startswith(models.SECTION_DATA))

      elif cur_section_is_useful:
        # Level 2 data match the "In" column. They specify object paths and
        # section names within objects, or '<internal>:...'.
        if level == 2:
          # E.g., 'path.o:(.text._name)' => ['path.o', '(.text._name)'].
          cur_obj, paren_value = tok.split(':')

          in_jump_table = '.L.cfi.jumptable' in paren_value
          if in_jump_table:
            # Store each CFI jump table as a Level 2 symbol, whose Level 3
            # details are discarded.
            jump_tables_count += 1
            cur_obj = ''  # Replaces 'lto.tmp' to prevent problem later.
            mangled_name = '** CFI jump table'
          else:
            # E.g., '(.text.unlikely._name)' -> '_name'.
            mangled_name = paren_value[mangled_start_idx:-1]
            cur_flags = _FlagsFromMangledName(mangled_name)
            is_partial = True
            # As of 2017/11 LLD does not distinguish merged strings from other
            # merged data. Feature request is filed under:
            # https://bugs.llvm.org/show_bug.cgi?id=35248
            if cur_obj == '<internal>':
              if cur_section == '.rodata' and mangled_name == '':
                # Treat all <internal> sections within .rodata as as string
                # literals. Some may hold numeric constants or other data, but
                # there is currently no way to distinguish them.
                mangled_name = '** lld merge strings'
              else:
                # e.g. <internal>:(.text.thunk)
                mangled_name = '** ' + mangled_name

              is_partial = False
              cur_obj = None
            elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
              thin_map[address] = os.path.basename(cur_obj)
              cur_obj = None

          # Create a symbol here since there may be no ensuing Level 3 lines.
          # But if there are, then the symbol can be modified later as sym[-1].
          sym = models.Symbol(cur_section, size, address=address,
                              full_name=mangled_name, object_path=cur_obj,
                              flags=cur_flags)
          syms.append(sym)

          # Level 3 |address| is nested under Level 2, don't add |size|.
          next_usable_address = address

        # Level 3 data match the "Symbol" column. They specify symbol names or
        # special names such as '.L_MergeGlobals'. Annotations such as '$d',
        # '$t.42' also appear at Level 3, but they are consumed by |tokenizer|,
        # so don't appear hear.
        elif level == 3:
          # Handle .L.cfi.jumptable.
          if in_jump_table:
            # Level 3 entries in CFI jump tables are thunks with mangled names.
            # Extracting them as symbols is not worthwhile; we only store the
            # Level 2 symbol, and print the count for verbose output. For
            # counting, '__typeid_' entries are excluded since they're likely
            # just annotations.
            if not tok.startswith('__typeid_'):
              jump_entries_count += 1
            continue

          # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
          # happen for ARM (32-bit) builds.
          if tok.startswith('.L_MergedGlobals'):
            continue

          # Use |span| to decide whether to use a Level 3 line for Symbols. This
          # is useful for two purposes:
          # * This is a better indicator than |size|, which can be 0 for
          #   assembly functions.
          # * If multiple Level 3 lines have the same starting address, this
          #   cause all but the last line to have |span > 0|. This dedups lines
          #   with identical symbol names (why do they exist?). Note that this
          #   also skips legitimate aliases, but that's desired because nm.py
          #   (downstream) assumes no aliases already exist.
          if span > 0:
            stripped_tok = demangle.StripLlvmPromotedGlobalNames(tok)
            if len(tok) != len(stripped_tok):
              promoted_name_count += 1
              tok = stripped_tok
            tok = _NormalizeName(tok)

            # Handle special case where a partial symbol consumes bytes before
            # the first Level 3 symbol.
            if is_partial and syms[-1].address < address:
              # Truncate the partial symbol and leave it without |full_name|.
              # The data from the current line will form a new symbol.
              syms[-1].size = address - syms[-1].address
              next_usable_address = address
              is_partial = False

            if is_partial:
              syms[-1].full_name = tok
              syms[-1].size = size if size > 0 else min(syms[-1].size, span)
              next_usable_address = address + syms[-1].size
              is_partial = False
            elif address >= next_usable_address:
              if tok.startswith('__typeid_'):
                assert size == 1
                if tok.endswith('_byte_array'):
                  # CFI byte array table: |size| is inaccurate, so use |span|.
                  size_to_use = span
                else:
                  # Likely '_global_addr' or '_unique_member'. These should be:
                  # * Skipped since they're in CFI tables.
                  # * Suppressed (via |next_usable_address|) by another Level 3
                  #   symbol.
                  # Anything that makes it here would be an anomaly worthy of
                  # investigation, so print warnings.
                  logging.warn('Unrecognized __typeid_ symbol at %08X', address)
                  continue
              else:
                # Prefer |size|, and only fall back to |span| if |size == 0|.
                size_to_use = size if size > 0 else span
              sym = models.Symbol(cur_section, size_to_use, address=address,
                                  full_name=tok, flags=cur_flags)
              syms.append(sym)

              # Suppress symbols with overlapping |address|. This eliminates
              # labels from assembly sources.
              next_usable_address = address + size_to_use
              if cur_obj is not None:
                syms[-1].object_path = cur_obj

        else:
          logging.error('Problem line: %r', line)

    if promoted_name_count:
      logging.info('Found %d promoted global names', promoted_name_count)
    if jump_tables_count:
      logging.info('Found %d CFI jump tables with %d total entries',
                   jump_tables_count, jump_entries_count)
    return self._section_ranges, syms, {'thin_map': thin_map}
예제 #30
0
  def Parse(self, lines):
    """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_sizes, symbols).
    """
# Newest format:
#     VMA      LMA     Size Align Out     In      Symbol
#     194      194       13     1 .interp
#     194      194       13     1         <internal>:(.interp)
#     1a8      1a8     22d8     4 .ARM.exidx
#     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
#     400      400   123400    64 .text
#     600      600       14     4         obj/...:(.text.OUTLINED_FUNCTION_0)
#     600      600        0     1                 $x.3
#     600      600       14     1                 OUTLINED_FUNCTION_0
#  123800   123800    20000   256 .rodata
#  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
#  123800   123800       4      1                 foo (.llvm.1234)
#  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
#  123804   123804       4      1                 bar.llvm.1234
# Older format:
# Address          Size             Align Out     In      Symbol
# 00000000002002a8 000000000000001c     1 .interp
# 00000000002002a8 000000000000001c     1         <internal>:(.interp)
# ...
# 0000000000201000 0000000000000202    16 .text
# 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
# 0000000000201000 0000000000000000     0                 _start
# 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
# 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
# 0000000000201030 0000000000000000     0                 deregister_tm_clones
# 0000000000201060 0000000000000000     0                 register_tm_clones
# 00000000002010a0 0000000000000000     0                 __do_global_dtors_aux
# 00000000002010c0 0000000000000000     0                 frame_dummy
# 00000000002010ed 0000000000000071     1         a.o:(.text)
# 00000000002010ed 0000000000000071     0                 main
    syms = []
    cur_section = None
    cur_section_is_useful = None
    promoted_name_count = 0
    # A Level 2 line does not supply |full_name| data (unless '<internal>').
    # This would be taken from a Level 3 line. |is_partial| indicates that an
    # eligible Level 3 line should be used to update |syms[-1].full_name|
    # instead of creating a new symbol.
    is_partial = False
    # Assembly code can create consecutive Level 3 lines with |size == 0|. These
    # lines can represent
    #  (1) assembly functions (should form symbol), or
    #  (2) assembly labels (should NOT form symbol).
    # It seems (2) correlates with the presence of a leading Level 3 line with
    # |size > 0|. This gives rise to the following strategy: Each symbol S from
    # a Level 3 line suppresses Level 3 lines with |address| less than
    # |next_usable_address := S.address + S.size|.
    next_usable_address = 0

    tokenizer = self.Tokenize(lines)
    for (line, address, size, level, span, tok) in tokenizer:
      # Level 1 data match the "Out" column. They specify sections or
      # PROVIDE_HIDDEN lines.
      if level == 1:
        if not tok.startswith('PROVIDE_HIDDEN'):
          self._section_sizes[tok] = size
        cur_section = tok
        # E.g., Want to convert "(.text._name)" -> "_name" later.
        mangled_start_idx = len(cur_section) + 2
        cur_section_is_useful = (
            cur_section in (models.SECTION_BSS,
                            models.SECTION_RODATA,
                            models.SECTION_TEXT) or
            cur_section.startswith(models.SECTION_DATA))

      elif cur_section_is_useful:
        # Level 2 data match the "In" column. They specify object paths and
        # section names within objects, or '<internal>:...'.
        if level == 2:
          # Create a symbol here since there may be no ensuing Level 3 lines.
          # But if there are, then the symbol can be modified later as sym[-1].
          syms.append(models.Symbol(cur_section, size, address=address))
          # E.g., 'path.o:(.text._name)' => ['path.o', '(.text._name)'].
          cur_obj, paren_value = tok.split(':')
          # E.g., '(.text._name)' -> '_name'.
          mangled_name = paren_value[mangled_start_idx:-1]
          # As of 2017/11 LLD does not distinguish merged strings from other
          # merged data. Feature request is filed under:
          # https://bugs.llvm.org/show_bug.cgi?id=35248
          if cur_obj == '<internal>':
            if cur_section == '.rodata' and mangled_name == '':
              # Treat all <internal> sections within .rodata as as string
              # literals. Some may hold numeric constants or other data, but
              # there is currently no way to distinguish them.
              syms[-1].full_name = '** lld merge strings'
            else:
              # e.g. <internal>:(.text.thunk)
              syms[-1].full_name = '** ' + mangled_name
            cur_obj = None
          elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
            cur_obj = None
          if cur_obj is not None:
            syms[-1].object_path = cur_obj

          is_partial = not bool(syms[-1].full_name)
          # Level 3 |address| is nested under Level 2, don't add |size|.
          next_usable_address = address

        # Level 3 data match the "Symbol" column. They specify symbol names or
        # special names such as '.L_MergeGlobals'. Annotations such as '$d',
        # '$t.42' also appear at Level 3, but they are consumed by |tokenizer|,
        # so don't appear hear.
        elif level == 3:
          # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
          # happen for ARM (32-bit) builds.
          if tok.startswith('.L_MergedGlobals'):
            continue

          # Use |span| to decide whether to use a Level 3 line for Symbols. This
          # is useful for two purposes:
          # * This is a better indicator than |size|, which can be 0 for
          #   assembly functions.
          # * If multiple Level 3 lines have the same starting address, this
          #   cause all but the last line to have |span > 0|. This dedups lines
          #   with identical symbol names (why do they exist?). Note that this
          #   also skips legitimate aliases, but that's desired because nm.py
          #   (downstream) assumes no aliases already exist.
          if span > 0:
            # Outlined functions have names like OUTLINED_FUNCTION_0, which can
            # appear 1000+ time, and can cause false aliasing. We treat these as
            # special cases by designating them as a placeholder symbols and
            # renaming them to '** outlined function'.
            if tok.startswith('OUTLINED_FUNCTION_'):
              tok = '** outlined function'
            stripped_tok = demangle.StripLlvmPromotedGlobalNames(tok)
            if len(tok) != len(stripped_tok):
              promoted_name_count += 1
              tok = stripped_tok

            # Handle special case where a partial symbol consumes bytes before
            # the first Level 3 symbol.
            if is_partial and syms[-1].address < address:
              # Truncate the partial symbol and leave it without |full_name|.
              # The data from the current line will form a new symbol.
              syms[-1].size = address - syms[-1].address
              next_usable_address = address
              is_partial = False

            if is_partial:
              syms[-1].full_name = tok
              syms[-1].size = size if size > 0 else min(syms[-1].size, span)
              next_usable_address = address + syms[-1].size
              is_partial = False
            elif address >= next_usable_address:
              # Prefer |size|, and only fall back to |span| if |size == 0|.
              size_to_use = size if size > 0 else span
              syms.append(
                  models.Symbol(
                      cur_section, size_to_use, address=address, full_name=tok))
              # Suppress symbols with overlapping |address|. This eliminates
              # labels from assembly sources.
              next_usable_address = address + size_to_use
              if cur_obj is not None:
                syms[-1].object_path = cur_obj

        else:
          logging.error('Problem line: %r', line)

    if promoted_name_count:
      logging.info('Found %d promoted global names', promoted_name_count)
    return self._section_sizes, syms