Example #1
0
  def Parse(self, lines):
    """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_ranges, symbols).
    """
    # Newest format:
    #     VMA      LMA     Size Align Out     In      Symbol
    #     194      194       13     1 .interp
    #     194      194       13     1         <internal>:(.interp)
    #     1a8      1a8     22d8     4 .ARM.exidx
    #     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
    #     400      400   123400    64 .text
    #     600      600       14     4         ...:(.text.OUTLINED_FUNCTION_0)
    #     600      600        0     1                 $x.3
    #     600      600       14     1                 OUTLINED_FUNCTION_0
    #  123800   123800    20000   256 .rodata
    #  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
    #  123800   123800       4      1                 foo (.llvm.1234)
    #  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
    #  123804   123804       4      1                 bar.llvm.1234
    # Older format:
    # Address          Size             Align Out     In      Symbol
    # 00000000002002a8 000000000000001c     1 .interp
    # 00000000002002a8 000000000000001c     1         <internal>:(.interp)
    # ...
    # 0000000000201000 0000000000000202    16 .text
    # 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
    # 0000000000201000 0000000000000000     0                 _start
    # 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
    # 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
    # 0000000000201030 0000000000000000     0             deregister_tm_clones
    # 0000000000201060 0000000000000000     0             register_tm_clones
    # 00000000002010a0 0000000000000000     0             __do_global_dtors_aux
    # 00000000002010c0 0000000000000000     0             frame_dummy
    # 00000000002010ed 0000000000000071     1         a.o:(.text)
    # 00000000002010ed 0000000000000071     0             main
    syms = []
    cur_section = None
    cur_section_is_useful = False
    promoted_name_count = 0
    # |is_partial| indicates that an eligible Level 3 line should be used to
    # update |syms[-1].full_name| instead of creating a new symbol.
    is_partial = False
    # Assembly code can create consecutive Level 3 lines with |size == 0|. These
    # lines can represent
    #  (1) assembly functions (should form symbol), or
    #  (2) assembly labels (should NOT form symbol).
    # It seems (2) correlates with the presence of a leading Level 3 line with
    # |size > 0|. This gives rise to the following strategy: Each symbol S from
    # a Level 3 line suppresses Level 3 lines with |address| less than
    # |next_usable_address := S.address + S.size|.
    next_usable_address = 0

    # For Thin-LTO, a map from each address to the Thin-LTO cache file. This
    # provides hints downstream to identify object_paths for .L.ref.tmp symbols,
    # but is not useful in the final output. Therefore it's stored separately,
    # instead of being in Symbol.
    thin_map = {}

    tokenizer = self.Tokenize(lines)

    in_partitions = False
    in_jump_table = False
    jump_tables_count = 0
    jump_entries_count = 0

    for (line, address, size, level, span, tok) in tokenizer:
      # Level 1 data match the "Out" column. They specify sections or
      # PROVIDE_HIDDEN lines.
      if level == 1:
        # Ignore sections that belong to feature library partitions. Seeing a
        # partition name is an indicator that we've entered a list of feature
        # partitions. After these, a single .part.end section will follow to
        # reserve memory at runtime. Seeing the .part.end section also marks the
        # end of partition sections in the map file.
        if tok.endswith('_partition'):
          in_partitions = True
        elif tok == '.part.end':
          # Note that we want to retain .part.end section, so it's fine to
          # restart processing on this section, rather than the next one.
          in_partitions = False

        if in_partitions:
          # For now, completely ignore feature partitions.
          cur_section = None
          cur_section_is_useful = False
        else:
          if not tok.startswith('PROVIDE_HIDDEN'):
            self._section_ranges[tok] = (address, size)
          cur_section = tok
          # E.g., Want to convert "(.text._name)" -> "_name" later.
          mangled_start_idx = len(cur_section) + 2
          cur_section_is_useful = (
              cur_section in models.BSS_SECTIONS
              or cur_section in (models.SECTION_RODATA, models.SECTION_TEXT)
              or cur_section.startswith(models.SECTION_DATA))

      elif cur_section_is_useful:
        # Level 2 data match the "In" column. They specify object paths and
        # section names within objects, or '<internal>:...'.
        if level == 2:
          # E.g., 'path.o:(.text._name)' => ['path.o', '(.text._name)'].
          cur_obj, paren_value = tok.split(':')

          in_jump_table = '.L.cfi.jumptable' in paren_value
          if in_jump_table:
            # Store each CFI jump table as a Level 2 symbol, whose Level 3
            # details are discarded.
            jump_tables_count += 1
            cur_obj = ''  # Replaces 'lto.tmp' to prevent problem later.
            mangled_name = '** CFI jump table'
          else:
            # E.g., '(.text.unlikely._name)' -> '_name'.
            mangled_name = paren_value[mangled_start_idx:-1]
            cur_flags = _FlagsFromMangledName(mangled_name)
            is_partial = True
            # As of 2017/11 LLD does not distinguish merged strings from other
            # merged data. Feature request is filed under:
            # https://bugs.llvm.org/show_bug.cgi?id=35248
            if cur_obj == '<internal>':
              if cur_section == '.rodata' and mangled_name == '':
                # Treat all <internal> sections within .rodata as as string
                # literals. Some may hold numeric constants or other data, but
                # there is currently no way to distinguish them.
                mangled_name = '** lld merge strings'
              else:
                # e.g. <internal>:(.text.thunk)
                mangled_name = '** ' + mangled_name

              is_partial = False
              cur_obj = None
            elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
              thin_map[address] = os.path.basename(cur_obj)
              cur_obj = None

          # Create a symbol here since there may be no ensuing Level 3 lines.
          # But if there are, then the symbol can be modified later as sym[-1].
          sym = models.Symbol(cur_section, size, address=address,
                              full_name=mangled_name, object_path=cur_obj,
                              flags=cur_flags)
          syms.append(sym)

          # Level 3 |address| is nested under Level 2, don't add |size|.
          next_usable_address = address

        # Level 3 data match the "Symbol" column. They specify symbol names or
        # special names such as '.L_MergeGlobals'. Annotations such as '$d',
        # '$t.42' also appear at Level 3, but they are consumed by |tokenizer|,
        # so don't appear hear.
        elif level == 3:
          # Handle .L.cfi.jumptable.
          if in_jump_table:
            # Level 3 entries in CFI jump tables are thunks with mangled names.
            # Extracting them as symbols is not worthwhile; we only store the
            # Level 2 symbol, and print the count for verbose output. For
            # counting, '__typeid_' entries are excluded since they're likely
            # just annotations.
            if not tok.startswith('__typeid_'):
              jump_entries_count += 1
            continue

          # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
          # happen for ARM (32-bit) builds.
          if tok.startswith('.L_MergedGlobals'):
            continue

          # Use |span| to decide whether to use a Level 3 line for Symbols. This
          # is useful for two purposes:
          # * This is a better indicator than |size|, which can be 0 for
          #   assembly functions.
          # * If multiple Level 3 lines have the same starting address, this
          #   cause all but the last line to have |span > 0|. This dedups lines
          #   with identical symbol names (why do they exist?). Note that this
          #   also skips legitimate aliases, but that's desired because nm.py
          #   (downstream) assumes no aliases already exist.
          if span > 0:
            stripped_tok = demangle.StripLlvmPromotedGlobalNames(tok)
            if len(tok) != len(stripped_tok):
              promoted_name_count += 1
              tok = stripped_tok
            tok = _NormalizeName(tok)

            # Handle special case where a partial symbol consumes bytes before
            # the first Level 3 symbol.
            if is_partial and syms[-1].address < address:
              # Truncate the partial symbol and leave it without |full_name|.
              # The data from the current line will form a new symbol.
              syms[-1].size = address - syms[-1].address
              next_usable_address = address
              is_partial = False

            if is_partial:
              syms[-1].full_name = tok
              syms[-1].size = size if size > 0 else min(syms[-1].size, span)
              next_usable_address = address + syms[-1].size
              is_partial = False
            elif address >= next_usable_address:
              if tok.startswith('__typeid_'):
                assert size == 1
                if tok.endswith('_byte_array'):
                  # CFI byte array table: |size| is inaccurate, so use |span|.
                  size_to_use = span
                else:
                  # Likely '_global_addr' or '_unique_member'. These should be:
                  # * Skipped since they're in CFI tables.
                  # * Suppressed (via |next_usable_address|) by another Level 3
                  #   symbol.
                  # Anything that makes it here would be an anomaly worthy of
                  # investigation, so print warnings.
                  logging.warn('Unrecognized __typeid_ symbol at %08X', address)
                  continue
              else:
                # Prefer |size|, and only fall back to |span| if |size == 0|.
                size_to_use = size if size > 0 else span
              sym = models.Symbol(cur_section, size_to_use, address=address,
                                  full_name=tok, flags=cur_flags)
              syms.append(sym)

              # Suppress symbols with overlapping |address|. This eliminates
              # labels from assembly sources.
              next_usable_address = address + size_to_use
              if cur_obj is not None:
                syms[-1].object_path = cur_obj

        else:
          logging.error('Problem line: %r', line)

    if promoted_name_count:
      logging.info('Found %d promoted global names', promoted_name_count)
    if jump_tables_count:
      logging.info('Found %d CFI jump tables with %d total entries',
                   jump_tables_count, jump_entries_count)
    return self._section_ranges, syms, {'thin_map': thin_map}
Example #2
0
  def Parse(self, lines):
    """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_sizes, symbols).
    """
# Newest format:
#     VMA      LMA     Size Align Out     In      Symbol
#     194      194       13     1 .interp
#     194      194       13     1         <internal>:(.interp)
#     1a8      1a8     22d8     4 .ARM.exidx
#     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
#     400      400   123400    64 .text
#     600      600       14     4         obj/...:(.text.OUTLINED_FUNCTION_0)
#     600      600        0     1                 $x.3
#     600      600       14     1                 OUTLINED_FUNCTION_0
#  123800   123800    20000   256 .rodata
#  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
#  123800   123800       4      1                 foo (.llvm.1234)
#  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
#  123804   123804       4      1                 bar.llvm.1234
# Older format:
# Address          Size             Align Out     In      Symbol
# 00000000002002a8 000000000000001c     1 .interp
# 00000000002002a8 000000000000001c     1         <internal>:(.interp)
# ...
# 0000000000201000 0000000000000202    16 .text
# 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
# 0000000000201000 0000000000000000     0                 _start
# 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
# 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
# 0000000000201030 0000000000000000     0                 deregister_tm_clones
# 0000000000201060 0000000000000000     0                 register_tm_clones
# 00000000002010a0 0000000000000000     0                 __do_global_dtors_aux
# 00000000002010c0 0000000000000000     0                 frame_dummy
# 00000000002010ed 0000000000000071     1         a.o:(.text)
# 00000000002010ed 0000000000000071     0                 main
    # Extract e.g., 'lld_v0' -> 0, or 'lld-lto_v1' -> 1.
    map_file_version = int(self._linker_name.split('_v')[1])
    pattern = MapFileParserLld._LINE_RE[map_file_version]

    syms = []
    cur_section = None
    cur_section_is_useful = None
    promoted_name_count = 0

    for line in lines:
      m = pattern.match(line)
      if m is None:
        continue
      address = int(m.group(1), 16)
      size = int(m.group(2), 16)
      indent_size = len(m.group(4))
      tok = m.group(5)

      if indent_size == 0:
        if not tok.startswith('PROVIDE_HIDDEN'):
          self._section_sizes[tok] = size
        cur_section = tok
        # E.g., Want to convert "(.text._name)" -> "_name" later.
        mangled_start_idx = len(cur_section) + 2
        cur_section_is_useful = (
            cur_section in (models.SECTION_BSS,
                            models.SECTION_RODATA,
                            models.SECTION_TEXT) or
            cur_section.startswith(models.SECTION_DATA))

      elif cur_section_is_useful:
        if indent_size == 8:
          # Create preliminary Symbol, which can be modified as sym[-1].
          syms.append(models.Symbol(cur_section, size, address=address))
          # E.g. path.o:(.text._name)
          cur_obj, paren_value = tok.split(':')
          # "(.text._name)" -> "_name".
          mangled_name = paren_value[mangled_start_idx:-1]
          # As of 2017/11 LLD does not distinguish merged strings from other
          # merged data. Feature request is filed under:
          # https://bugs.llvm.org/show_bug.cgi?id=35248
          if cur_obj == '<internal>':
            if cur_section == '.rodata' and mangled_name == '':
              # Treat all <internal> sections within .rodata as as string
              # literals. Some may hold numeric constants or other data, but
              # there is currently no way to distinguish them.
              syms[-1].full_name = '** lld merge strings'
            else:
              # e.g. <internal>:(.text.thunk)
              syms[-1].full_name = '** ' + mangled_name
          elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
            pass
          else:
            syms[-1].object_path = cur_obj

        elif indent_size == 16:
          # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
          # happen for ARM (32-bit) builds.
          if tok.startswith('.L_MergedGlobals'):
            continue
          # If multiple entries exist, take the first on that reports a size.
          # Zero-length symbols look like "$t.4", "$d.5".
          if size and not syms[-1].full_name:
            # Outlined functions have names like OUTLINED_FUNCTION_0, which can
            # appear 1000+ time that can cause false aliasing. We treat these as
            # special cases by designating them as a placeholder symbols and
            # renaming them to '** outlined function'.
            if tok.startswith('OUTLINED_FUNCTION_'):
              tok = '** outlined function'
            stripped_tok = demangle.StripLlvmPromotedGlobalNames(tok)
            if len(tok) != len(stripped_tok):
              promoted_name_count += 1
              tok = stripped_tok
            syms[-1].full_name = tok
        else:
          logging.error('Problem line: %r', line)

    if promoted_name_count:
      logging.info('Found %d promoted global names', promoted_name_count)
    return self._section_sizes, syms
Example #3
0
  def Parse(self, lines):
    """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_sizes, symbols).
    """
# Newest format:
#     VMA      LMA     Size Align Out     In      Symbol
#     194      194       13     1 .interp
#     194      194       13     1         <internal>:(.interp)
#     1a8      1a8     22d8     4 .ARM.exidx
#     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
#     400      400   123400    64 .text
#     600      600       14     4         obj/...:(.text.OUTLINED_FUNCTION_0)
#     600      600        0     1                 $x.3
#     600      600       14     1                 OUTLINED_FUNCTION_0
#  123800   123800    20000   256 .rodata
#  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
#  123800   123800       4      1                 foo (.llvm.1234)
#  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
#  123804   123804       4      1                 bar.llvm.1234
# Older format:
# Address          Size             Align Out     In      Symbol
# 00000000002002a8 000000000000001c     1 .interp
# 00000000002002a8 000000000000001c     1         <internal>:(.interp)
# ...
# 0000000000201000 0000000000000202    16 .text
# 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
# 0000000000201000 0000000000000000     0                 _start
# 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
# 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
# 0000000000201030 0000000000000000     0                 deregister_tm_clones
# 0000000000201060 0000000000000000     0                 register_tm_clones
# 00000000002010a0 0000000000000000     0                 __do_global_dtors_aux
# 00000000002010c0 0000000000000000     0                 frame_dummy
# 00000000002010ed 0000000000000071     1         a.o:(.text)
# 00000000002010ed 0000000000000071     0                 main
    syms = []
    cur_section = None
    cur_section_is_useful = None
    promoted_name_count = 0
    # A Level 2 line does not supply |full_name| data (unless '<internal>').
    # This would be taken from a Level 3 line. |is_partial| indicates that an
    # eligible Level 3 line should be used to update |syms[-1].full_name|
    # instead of creating a new symbol.
    is_partial = False
    # Assembly code can create consecutive Level 3 lines with |size == 0|. These
    # lines can represent
    #  (1) assembly functions (should form symbol), or
    #  (2) assembly labels (should NOT form symbol).
    # It seems (2) correlates with the presence of a leading Level 3 line with
    # |size > 0|. This gives rise to the following strategy: Each symbol S from
    # a Level 3 line suppresses Level 3 lines with |address| less than
    # |next_usable_address := S.address + S.size|.
    next_usable_address = 0

    tokenizer = self.Tokenize(lines)
    for (line, address, size, level, span, tok) in tokenizer:
      # Level 1 data match the "Out" column. They specify sections or
      # PROVIDE_HIDDEN lines.
      if level == 1:
        if not tok.startswith('PROVIDE_HIDDEN'):
          self._section_sizes[tok] = size
        cur_section = tok
        # E.g., Want to convert "(.text._name)" -> "_name" later.
        mangled_start_idx = len(cur_section) + 2
        cur_section_is_useful = (
            cur_section in (models.SECTION_BSS,
                            models.SECTION_RODATA,
                            models.SECTION_TEXT) or
            cur_section.startswith(models.SECTION_DATA))

      elif cur_section_is_useful:
        # Level 2 data match the "In" column. They specify object paths and
        # section names within objects, or '<internal>:...'.
        if level == 2:
          # Create a symbol here since there may be no ensuing Level 3 lines.
          # But if there are, then the symbol can be modified later as sym[-1].
          syms.append(models.Symbol(cur_section, size, address=address))
          # E.g., 'path.o:(.text._name)' => ['path.o', '(.text._name)'].
          cur_obj, paren_value = tok.split(':')
          # E.g., '(.text._name)' -> '_name'.
          mangled_name = paren_value[mangled_start_idx:-1]
          # As of 2017/11 LLD does not distinguish merged strings from other
          # merged data. Feature request is filed under:
          # https://bugs.llvm.org/show_bug.cgi?id=35248
          if cur_obj == '<internal>':
            if cur_section == '.rodata' and mangled_name == '':
              # Treat all <internal> sections within .rodata as as string
              # literals. Some may hold numeric constants or other data, but
              # there is currently no way to distinguish them.
              syms[-1].full_name = '** lld merge strings'
            else:
              # e.g. <internal>:(.text.thunk)
              syms[-1].full_name = '** ' + mangled_name
            cur_obj = None
          elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
            cur_obj = None
          if cur_obj is not None:
            syms[-1].object_path = cur_obj

          is_partial = not bool(syms[-1].full_name)
          # Level 3 |address| is nested under Level 2, don't add |size|.
          next_usable_address = address

        # Level 3 data match the "Symbol" column. They specify symbol names or
        # special names such as '.L_MergeGlobals'. Annotations such as '$d',
        # '$t.42' also appear at Level 3, but they are consumed by |tokenizer|,
        # so don't appear hear.
        elif level == 3:
          # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
          # happen for ARM (32-bit) builds.
          if tok.startswith('.L_MergedGlobals'):
            continue

          # Use |span| to decide whether to use a Level 3 line for Symbols. This
          # is useful for two purposes:
          # * This is a better indicator than |size|, which can be 0 for
          #   assembly functions.
          # * If multiple Level 3 lines have the same starting address, this
          #   cause all but the last line to have |span > 0|. This dedups lines
          #   with identical symbol names (why do they exist?). Note that this
          #   also skips legitimate aliases, but that's desired because nm.py
          #   (downstream) assumes no aliases already exist.
          if span > 0:
            # Outlined functions have names like OUTLINED_FUNCTION_0, which can
            # appear 1000+ time, and can cause false aliasing. We treat these as
            # special cases by designating them as a placeholder symbols and
            # renaming them to '** outlined function'.
            if tok.startswith('OUTLINED_FUNCTION_'):
              tok = '** outlined function'
            stripped_tok = demangle.StripLlvmPromotedGlobalNames(tok)
            if len(tok) != len(stripped_tok):
              promoted_name_count += 1
              tok = stripped_tok

            # Handle special case where a partial symbol consumes bytes before
            # the first Level 3 symbol.
            if is_partial and syms[-1].address < address:
              # Truncate the partial symbol and leave it without |full_name|.
              # The data from the current line will form a new symbol.
              syms[-1].size = address - syms[-1].address
              next_usable_address = address
              is_partial = False

            if is_partial:
              syms[-1].full_name = tok
              syms[-1].size = size if size > 0 else min(syms[-1].size, span)
              next_usable_address = address + syms[-1].size
              is_partial = False
            elif address >= next_usable_address:
              # Prefer |size|, and only fall back to |span| if |size == 0|.
              size_to_use = size if size > 0 else span
              syms.append(
                  models.Symbol(
                      cur_section, size_to_use, address=address, full_name=tok))
              # Suppress symbols with overlapping |address|. This eliminates
              # labels from assembly sources.
              next_usable_address = address + size_to_use
              if cur_obj is not None:
                syms[-1].object_path = cur_obj

        else:
          logging.error('Problem line: %r', line)

    if promoted_name_count:
      logging.info('Found %d promoted global names', promoted_name_count)
    return self._section_sizes, syms
Example #4
0
    def Parse(self, lines):
        """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_sizes, symbols).
    """
        # Newest format:
        #     VMA      LMA     Size Align Out     In      Symbol
        #     194      194       13     1 .interp
        #     194      194       13     1         <internal>:(.interp)
        #     1a8      1a8     22d8     4 .ARM.exidx
        #     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
        #     400      400   123400    64 .text
        #     600      600       14     4         obj/...:(.text.OUTLINED_FUNCTION_0)
        #     600      600        0     1                 $x.3
        #     600      600       14     1                 OUTLINED_FUNCTION_0
        #  123800   123800    20000   256 .rodata
        #  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
        #  123800   123800       4      1                 foo (.llvm.1234)
        #  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
        #  123804   123804       4      1                 bar.llvm.1234
        # Older format:
        # Address          Size             Align Out     In      Symbol
        # 00000000002002a8 000000000000001c     1 .interp
        # 00000000002002a8 000000000000001c     1         <internal>:(.interp)
        # ...
        # 0000000000201000 0000000000000202    16 .text
        # 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
        # 0000000000201000 0000000000000000     0                 _start
        # 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
        # 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
        # 0000000000201030 0000000000000000     0                 deregister_tm_clones
        # 0000000000201060 0000000000000000     0                 register_tm_clones
        # 00000000002010a0 0000000000000000     0                 __do_global_dtors_aux
        # 00000000002010c0 0000000000000000     0                 frame_dummy
        # 00000000002010ed 0000000000000071     1         a.o:(.text)
        # 00000000002010ed 0000000000000071     0                 main
        syms = []
        cur_section = None
        cur_section_is_useful = None
        promoted_name_count = 0

        tokenizer = self.Tokenize(lines)
        # TODO(huangs): Use |span| from |tokenizer| to fix http://crbug.com/892648.
        for (line, address, size, level, _, tok) in tokenizer:
            # Level 1 data match the "Out" column. They specify sections or
            # PROVIDE_HIDDEN lines.
            if level == 1:
                if not tok.startswith('PROVIDE_HIDDEN'):
                    self._section_sizes[tok] = size
                cur_section = tok
                # E.g., Want to convert "(.text._name)" -> "_name" later.
                mangled_start_idx = len(cur_section) + 2
                cur_section_is_useful = (
                    cur_section in (models.SECTION_BSS, models.SECTION_RODATA,
                                    models.SECTION_TEXT)
                    or cur_section.startswith(models.SECTION_DATA))

            elif cur_section_is_useful:
                # Level 2 data match the "In" column. They specify object paths and
                # section names within objects, or '<internal>:...'.
                if level == 2:
                    # Create symbol, which can be modified as sym[-1] by Level 3 parsing.
                    syms.append(
                        models.Symbol(cur_section, size, address=address))
                    # E.g. path.o:(.text._name)
                    cur_obj, paren_value = tok.split(':')
                    # '(.text._name)' -> '_name'.
                    mangled_name = paren_value[mangled_start_idx:-1]
                    # As of 2017/11 LLD does not distinguish merged strings from other
                    # merged data. Feature request is filed under:
                    # https://bugs.llvm.org/show_bug.cgi?id=35248
                    if cur_obj == '<internal>':
                        if cur_section == '.rodata' and mangled_name == '':
                            # Treat all <internal> sections within .rodata as as string
                            # literals. Some may hold numeric constants or other data, but
                            # there is currently no way to distinguish them.
                            syms[-1].full_name = '** lld merge strings'
                        else:
                            # e.g. <internal>:(.text.thunk)
                            syms[-1].full_name = '** ' + mangled_name
                    elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
                        pass
                    else:
                        syms[-1].object_path = cur_obj

                # Level 3 data match the "Symbol" column. They specify symbol names or
                # special names such as '.L_MergeGlobals'. Annotations such as '$d',
                # '$t.42' also appear at Level 3, but they are consumed by |tokenizer|,
                # so don't appear hear.
                elif level == 3:
                    # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
                    # happen for ARM (32-bit) builds.
                    if tok.startswith('.L_MergedGlobals'):
                        continue
                    # Multiple Level 3 entries may exist. Take the first with |size != 0|.
                    # TODO(huangs): Process all entries to fix http://crbug.com/892648.
                    if size and not syms[-1].full_name:
                        # Outlined functions have names like OUTLINED_FUNCTION_0, which can
                        # appear 1000+ time, and can cause false aliasing. We treat these as
                        # special cases by designating them as a placeholder symbols and
                        # renaming them to '** outlined function'.
                        if tok.startswith('OUTLINED_FUNCTION_'):
                            tok = '** outlined function'
                        stripped_tok = demangle.StripLlvmPromotedGlobalNames(
                            tok)
                        if len(tok) != len(stripped_tok):
                            promoted_name_count += 1
                            tok = stripped_tok
                        syms[-1].full_name = tok
                else:
                    logging.error('Problem line: %r', line)

        if promoted_name_count:
            logging.info('Found %d promoted global names', promoted_name_count)
        return self._section_sizes, syms