Python StripLlvmPromotedGlobalNames Examples

Programming Language: Python

Namespace/Package Name: demangle

Method/Function: StripLlvmPromotedGlobalNames

Examples at hotexamples.com: 4

Python StripLlvmPromotedGlobalNames - 4 examples found. These are the top rated real world Python examples of demangle.StripLlvmPromotedGlobalNames extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

  def Parse(self, lines):
    """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_ranges, symbols).
    """
    # Newest format:
    #     VMA      LMA     Size Align Out     In      Symbol
    #     194      194       13     1 .interp
    #     194      194       13     1         <internal>:(.interp)
    #     1a8      1a8     22d8     4 .ARM.exidx
    #     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
    #     400      400   123400    64 .text
    #     600      600       14     4         ...:(.text.OUTLINED_FUNCTION_0)
    #     600      600        0     1                 $x.3
    #     600      600       14     1                 OUTLINED_FUNCTION_0
    #  123800   123800    20000   256 .rodata
    #  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
    #  123800   123800       4      1                 foo (.llvm.1234)
    #  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
    #  123804   123804       4      1                 bar.llvm.1234
    # Older format:
    # Address          Size             Align Out     In      Symbol
    # 00000000002002a8 000000000000001c     1 .interp
    # 00000000002002a8 000000000000001c     1         <internal>:(.interp)
    # ...
    # 0000000000201000 0000000000000202    16 .text
    # 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
    # 0000000000201000 0000000000000000     0                 _start
    # 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
    # 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
    # 0000000000201030 0000000000000000     0             deregister_tm_clones
    # 0000000000201060 0000000000000000     0             register_tm_clones
    # 00000000002010a0 0000000000000000     0             __do_global_dtors_aux
    # 00000000002010c0 0000000000000000     0             frame_dummy
    # 00000000002010ed 0000000000000071     1         a.o:(.text)
    # 00000000002010ed 0000000000000071     0             main
    syms = []
    cur_section = None
    cur_section_is_useful = False
    promoted_name_count = 0
    # |is_partial| indicates that an eligible Level 3 line should be used to
    # update |syms[-1].full_name| instead of creating a new symbol.
    is_partial = False
    # Assembly code can create consecutive Level 3 lines with |size == 0|. These
    # lines can represent
    #  (1) assembly functions (should form symbol), or
    #  (2) assembly labels (should NOT form symbol).
    # It seems (2) correlates with the presence of a leading Level 3 line with
    # |size > 0|. This gives rise to the following strategy: Each symbol S from
    # a Level 3 line suppresses Level 3 lines with |address| less than
    # |next_usable_address := S.address + S.size|.
    next_usable_address = 0

    # For Thin-LTO, a map from each address to the Thin-LTO cache file. This
    # provides hints downstream to identify object_paths for .L.ref.tmp symbols,
    # but is not useful in the final output. Therefore it's stored separately,
    # instead of being in Symbol.
    thin_map = {}

    tokenizer = self.Tokenize(lines)

    in_partitions = False
    in_jump_table = False
    jump_tables_count = 0
    jump_entries_count = 0

    for (line, address, size, level, span, tok) in tokenizer:
      # Level 1 data match the "Out" column. They specify sections or
      # PROVIDE_HIDDEN lines.
      if level == 1:
        # Ignore sections that belong to feature library partitions. Seeing a
        # partition name is an indicator that we've entered a list of feature
        # partitions. After these, a single .part.end section will follow to
        # reserve memory at runtime. Seeing the .part.end section also marks the
        # end of partition sections in the map file.
        if tok.endswith('_partition'):
          in_partitions = True
        elif tok == '.part.end':
          # Note that we want to retain .part.end section, so it's fine to
          # restart processing on this section, rather than the next one.
          in_partitions = False

        if in_partitions:
          # For now, completely ignore feature partitions.
          cur_section = None
          cur_section_is_useful = False
        else:
          if not tok.startswith('PROVIDE_HIDDEN'):
            self._section_ranges[tok] = (address, size)
          cur_section = tok
          # E.g., Want to convert "(.text._name)" -> "_name" later.
          mangled_start_idx = len(cur_section) + 2
          cur_section_is_useful = (
              cur_section in models.BSS_SECTIONS
              or cur_section in (models.SECTION_RODATA, models.SECTION_TEXT)
              or cur_section.startswith(models.SECTION_DATA))

      elif cur_section_is_useful:
        # Level 2 data match the "In" column. They specify object paths and
        # section names within objects, or '<internal>:...'.
        if level == 2:
          # E.g., 'path.o:(.text._name)' => ['path.o', '(.text._name)'].
          cur_obj, paren_value = tok.split(':')

          in_jump_table = '.L.cfi.jumptable' in paren_value
          if in_jump_table:
            # Store each CFI jump table as a Level 2 symbol, whose Level 3
            # details are discarded.
            jump_tables_count += 1
            cur_obj = ''  # Replaces 'lto.tmp' to prevent problem later.
            mangled_name = '** CFI jump table'
          else:
            # E.g., '(.text.unlikely._name)' -> '_name'.
            mangled_name = paren_value[mangled_start_idx:-1]
            cur_flags = _FlagsFromMangledName(mangled_name)
            is_partial = True
            # As of 2017/11 LLD does not distinguish merged strings from other
            # merged data. Feature request is filed under:
            # https://bugs.llvm.org/show_bug.cgi?id=35248
            if cur_obj == '<internal>':
              if cur_section == '.rodata' and mangled_name == '':
                # Treat all <internal> sections within .rodata as as string
                # literals. Some may hold numeric constants or other data, but
                # there is currently no way to distinguish them.
                mangled_name = '** lld merge strings'
              else:
                # e.g. <internal>:(.text.thunk)
                mangled_name = '** ' + mangled_name

              is_partial = False
              cur_obj = None
            elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
              thin_map[address] = os.path.basename(cur_obj)
              cur_obj = None

          # Create a symbol here since there may be no ensuing Level 3 lines.
          # But if there are, then the symbol can be modified later as sym[-1].
          sym = models.Symbol(cur_section, size, address=address,
                              full_name=mangled_name, object_path=cur_obj,
                              flags=cur_flags)
          syms.append(sym)

          # Level 3 |address| is nested under Level 2, don't add |size|.
          next_usable_address = address

        # Level 3 data match the "Symbol" column. They specify symbol names or
        # special names such as '.L_MergeGlobals'. Annotations such as '$d',
        # '$t.42' also appear at Level 3, but they are consumed by |tokenizer|,
        # so don't appear hear.
        elif level == 3:
          # Handle .L.cfi.jumptable.
          if in_jump_table:
            # Level 3 entries in CFI jump tables are thunks with mangled names.
            # Extracting them as symbols is not worthwhile; we only store the
            # Level 2 symbol, and print the count for verbose output. For
            # counting, '__typeid_' entries are excluded since they're likely
            # just annotations.
            if not tok.startswith('__typeid_'):
              jump_entries_count += 1
            continue

          # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
          # happen for ARM (32-bit) builds.
          if tok.startswith('.L_MergedGlobals'):
            continue

          # Use |span| to decide whether to use a Level 3 line for Symbols. This
          # is useful for two purposes:
          # * This is a better indicator than |size|, which can be 0 for
          #   assembly functions.
          # * If multiple Level 3 lines have the same starting address, this
          #   cause all but the last line to have |span > 0|. This dedups lines
          #   with identical symbol names (why do they exist?). Note that this
          #   also skips legitimate aliases, but that's desired because nm.py
          #   (downstream) assumes no aliases already exist.
          if span > 0:
            stripped_tok = demangle.StripLlvmPromotedGlobalNames(tok)
            if len(tok) != len(stripped_tok):
              promoted_name_count += 1
              tok = stripped_tok
            tok = _NormalizeName(tok)

            # Handle special case where a partial symbol consumes bytes before
            # the first Level 3 symbol.
            if is_partial and syms[-1].address < address:
              # Truncate the partial symbol and leave it without |full_name|.
              # The data from the current line will form a new symbol.
              syms[-1].size = address - syms[-1].address
              next_usable_address = address
              is_partial = False

            if is_partial:
              syms[-1].full_name = tok
              syms[-1].size = size if size > 0 else min(syms[-1].size, span)
              next_usable_address = address + syms[-1].size
              is_partial = False
            elif address >= next_usable_address:
              if tok.startswith('__typeid_'):
                assert size == 1
                if tok.endswith('_byte_array'):
                  # CFI byte array table: |size| is inaccurate, so use |span|.
                  size_to_use = span
                else:
                  # Likely '_global_addr' or '_unique_member'. These should be:
                  # * Skipped since they're in CFI tables.
                  # * Suppressed (via |next_usable_address|) by another Level 3
                  #   symbol.
                  # Anything that makes it here would be an anomaly worthy of
                  # investigation, so print warnings.
                  logging.warn('Unrecognized __typeid_ symbol at %08X', address)
                  continue
              else:
                # Prefer |size|, and only fall back to |span| if |size == 0|.
                size_to_use = size if size > 0 else span
              sym = models.Symbol(cur_section, size_to_use, address=address,
                                  full_name=tok, flags=cur_flags)
              syms.append(sym)

              # Suppress symbols with overlapping |address|. This eliminates
              # labels from assembly sources.
              next_usable_address = address + size_to_use
              if cur_obj is not None:
                syms[-1].object_path = cur_obj

        else:
          logging.error('Problem line: %r', line)

    if promoted_name_count:
      logging.info('Found %d promoted global names', promoted_name_count)
    if jump_tables_count:
      logging.info('Found %d CFI jump tables with %d total entries',
                   jump_tables_count, jump_entries_count)
    return self._section_ranges, syms, {'thin_map': thin_map}

Example #2

Show file

File: linker_map_parser.py Project: ChenMiaoS/chromium

  def Parse(self, lines):
    """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_sizes, symbols).
    """
# Newest format:
#     VMA      LMA     Size Align Out     In      Symbol
#     194      194       13     1 .interp
#     194      194       13     1         <internal>:(.interp)
#     1a8      1a8     22d8     4 .ARM.exidx
#     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
#     400      400   123400    64 .text
#     600      600       14     4         obj/...:(.text.OUTLINED_FUNCTION_0)
#     600      600        0     1                 $x.3
#     600      600       14     1                 OUTLINED_FUNCTION_0
#  123800   123800    20000   256 .rodata
#  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
#  123800   123800       4      1                 foo (.llvm.1234)
#  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
#  123804   123804       4      1                 bar.llvm.1234
# Older format:
# Address          Size             Align Out     In      Symbol
# 00000000002002a8 000000000000001c     1 .interp
# 00000000002002a8 000000000000001c     1         <internal>:(.interp)
# ...
# 0000000000201000 0000000000000202    16 .text
# 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
# 0000000000201000 0000000000000000     0                 _start
# 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
# 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
# 0000000000201030 0000000000000000     0                 deregister_tm_clones
# 0000000000201060 0000000000000000     0                 register_tm_clones
# 00000000002010a0 0000000000000000     0                 __do_global_dtors_aux
# 00000000002010c0 0000000000000000     0                 frame_dummy
# 00000000002010ed 0000000000000071     1         a.o:(.text)
# 00000000002010ed 0000000000000071     0                 main
    # Extract e.g., 'lld_v0' -> 0, or 'lld-lto_v1' -> 1.
    map_file_version = int(self._linker_name.split('_v')[1])
    pattern = MapFileParserLld._LINE_RE[map_file_version]

    syms = []
    cur_section = None
    cur_section_is_useful = None
    promoted_name_count = 0

    for line in lines:
      m = pattern.match(line)
      if m is None:
        continue
      address = int(m.group(1), 16)
      size = int(m.group(2), 16)
      indent_size = len(m.group(4))
      tok = m.group(5)

      if indent_size == 0:
        if not tok.startswith('PROVIDE_HIDDEN'):
          self._section_sizes[tok] = size
        cur_section = tok
        # E.g., Want to convert "(.text._name)" -> "_name" later.
        mangled_start_idx = len(cur_section) + 2
        cur_section_is_useful = (
            cur_section in (models.SECTION_BSS,
                            models.SECTION_RODATA,
                            models.SECTION_TEXT) or
            cur_section.startswith(models.SECTION_DATA))

      elif cur_section_is_useful:
        if indent_size == 8:
          # Create preliminary Symbol, which can be modified as sym[-1].
          syms.append(models.Symbol(cur_section, size, address=address))
          # E.g. path.o:(.text._name)
          cur_obj, paren_value = tok.split(':')
          # "(.text._name)" -> "_name".
          mangled_name = paren_value[mangled_start_idx:-1]
          # As of 2017/11 LLD does not distinguish merged strings from other
          # merged data. Feature request is filed under:
          # https://bugs.llvm.org/show_bug.cgi?id=35248
          if cur_obj == '<internal>':
            if cur_section == '.rodata' and mangled_name == '':
              # Treat all <internal> sections within .rodata as as string
              # literals. Some may hold numeric constants or other data, but
              # there is currently no way to distinguish them.
              syms[-1].full_name = '** lld merge strings'
            else:
              # e.g. <internal>:(.text.thunk)
              syms[-1].full_name = '** ' + mangled_name
          elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
            pass
          else:
            syms[-1].object_path = cur_obj

        elif indent_size == 16:
          # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
          # happen for ARM (32-bit) builds.
          if tok.startswith('.L_MergedGlobals'):
            continue
          # If multiple entries exist, take the first on that reports a size.
          # Zero-length symbols look like "$t.4", "$d.5".
          if size and not syms[-1].full_name:
            # Outlined functions have names like OUTLINED_FUNCTION_0, which can
            # appear 1000+ time that can cause false aliasing. We treat these as
            # special cases by designating them as a placeholder symbols and
            # renaming them to '** outlined function'.
            if tok.startswith('OUTLINED_FUNCTION_'):
              tok = '** outlined function'
            stripped_tok = demangle.StripLlvmPromotedGlobalNames(tok)
            if len(tok) != len(stripped_tok):
              promoted_name_count += 1
              tok = stripped_tok
            syms[-1].full_name = tok
        else:
          logging.error('Problem line: %r', line)

    if promoted_name_count:
      logging.info('Found %d promoted global names', promoted_name_count)
    return self._section_sizes, syms

Example #3

Show file

File: linker_map_parser.py Project: matisha001/chromium

  def Parse(self, lines):
    """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_sizes, symbols).
    """
# Newest format:
#     VMA      LMA     Size Align Out     In      Symbol
#     194      194       13     1 .interp
#     194      194       13     1         <internal>:(.interp)
#     1a8      1a8     22d8     4 .ARM.exidx
#     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
#     400      400   123400    64 .text
#     600      600       14     4         obj/...:(.text.OUTLINED_FUNCTION_0)
#     600      600        0     1                 $x.3
#     600      600       14     1                 OUTLINED_FUNCTION_0
#  123800   123800    20000   256 .rodata
#  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
#  123800   123800       4      1                 foo (.llvm.1234)
#  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
#  123804   123804       4      1                 bar.llvm.1234
# Older format:
# Address          Size             Align Out     In      Symbol
# 00000000002002a8 000000000000001c     1 .interp
# 00000000002002a8 000000000000001c     1         <internal>:(.interp)
# ...
# 0000000000201000 0000000000000202    16 .text
# 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
# 0000000000201000 0000000000000000     0                 _start
# 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
# 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
# 0000000000201030 0000000000000000     0                 deregister_tm_clones
# 0000000000201060 0000000000000000     0                 register_tm_clones
# 00000000002010a0 0000000000000000     0                 __do_global_dtors_aux
# 00000000002010c0 0000000000000000     0                 frame_dummy
# 00000000002010ed 0000000000000071     1         a.o:(.text)
# 00000000002010ed 0000000000000071     0                 main
    syms = []
    cur_section = None
    cur_section_is_useful = None
    promoted_name_count = 0
    # A Level 2 line does not supply |full_name| data (unless '<internal>').
    # This would be taken from a Level 3 line. |is_partial| indicates that an
    # eligible Level 3 line should be used to update |syms[-1].full_name|
    # instead of creating a new symbol.
    is_partial = False
    # Assembly code can create consecutive Level 3 lines with |size == 0|. These
    # lines can represent
    #  (1) assembly functions (should form symbol), or
    #  (2) assembly labels (should NOT form symbol).
    # It seems (2) correlates with the presence of a leading Level 3 line with
    # |size > 0|. This gives rise to the following strategy: Each symbol S from
    # a Level 3 line suppresses Level 3 lines with |address| less than
    # |next_usable_address := S.address + S.size|.
    next_usable_address = 0

    tokenizer = self.Tokenize(lines)
    for (line, address, size, level, span, tok) in tokenizer:
      # Level 1 data match the "Out" column. They specify sections or
      # PROVIDE_HIDDEN lines.
      if level == 1:
        if not tok.startswith('PROVIDE_HIDDEN'):
          self._section_sizes[tok] = size
        cur_section = tok
        # E.g., Want to convert "(.text._name)" -> "_name" later.
        mangled_start_idx = len(cur_section) + 2
        cur_section_is_useful = (
            cur_section in (models.SECTION_BSS,
                            models.SECTION_RODATA,
                            models.SECTION_TEXT) or
            cur_section.startswith(models.SECTION_DATA))

      elif cur_section_is_useful:
        # Level 2 data match the "In" column. They specify object paths and
        # section names within objects, or '<internal>:...'.
        if level == 2:
          # Create a symbol here since there may be no ensuing Level 3 lines.
          # But if there are, then the symbol can be modified later as sym[-1].
          syms.append(models.Symbol(cur_section, size, address=address))
          # E.g., 'path.o:(.text._name)' => ['path.o', '(.text._name)'].
          cur_obj, paren_value = tok.split(':')
          # E.g., '(.text._name)' -> '_name'.
          mangled_name = paren_value[mangled_start_idx:-1]
          # As of 2017/11 LLD does not distinguish merged strings from other
          # merged data. Feature request is filed under:
          # https://bugs.llvm.org/show_bug.cgi?id=35248
          if cur_obj == '<internal>':
            if cur_section == '.rodata' and mangled_name == '':
              # Treat all <internal> sections within .rodata as as string
              # literals. Some may hold numeric constants or other data, but
              # there is currently no way to distinguish them.
              syms[-1].full_name = '** lld merge strings'
            else:
              # e.g. <internal>:(.text.thunk)
              syms[-1].full_name = '** ' + mangled_name
            cur_obj = None
          elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
            cur_obj = None
          if cur_obj is not None:
            syms[-1].object_path = cur_obj

          is_partial = not bool(syms[-1].full_name)
          # Level 3 |address| is nested under Level 2, don't add |size|.
          next_usable_address = address

        # Level 3 data match the "Symbol" column. They specify symbol names or
        # special names such as '.L_MergeGlobals'. Annotations such as '$d',
        # '$t.42' also appear at Level 3, but they are consumed by |tokenizer|,
        # so don't appear hear.
        elif level == 3:
          # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
          # happen for ARM (32-bit) builds.
          if tok.startswith('.L_MergedGlobals'):
            continue

          # Use |span| to decide whether to use a Level 3 line for Symbols. This
          # is useful for two purposes:
          # * This is a better indicator than |size|, which can be 0 for
          #   assembly functions.
          # * If multiple Level 3 lines have the same starting address, this
          #   cause all but the last line to have |span > 0|. This dedups lines
          #   with identical symbol names (why do they exist?). Note that this
          #   also skips legitimate aliases, but that's desired because nm.py
          #   (downstream) assumes no aliases already exist.
          if span > 0:
            # Outlined functions have names like OUTLINED_FUNCTION_0, which can
            # appear 1000+ time, and can cause false aliasing. We treat these as
            # special cases by designating them as a placeholder symbols and
            # renaming them to '** outlined function'.
            if tok.startswith('OUTLINED_FUNCTION_'):
              tok = '** outlined function'
            stripped_tok = demangle.StripLlvmPromotedGlobalNames(tok)
            if len(tok) != len(stripped_tok):
              promoted_name_count += 1
              tok = stripped_tok

            # Handle special case where a partial symbol consumes bytes before
            # the first Level 3 symbol.
            if is_partial and syms[-1].address < address:
              # Truncate the partial symbol and leave it without |full_name|.
              # The data from the current line will form a new symbol.
              syms[-1].size = address - syms[-1].address
              next_usable_address = address
              is_partial = False

            if is_partial:
              syms[-1].full_name = tok
              syms[-1].size = size if size > 0 else min(syms[-1].size, span)
              next_usable_address = address + syms[-1].size
              is_partial = False
            elif address >= next_usable_address:
              # Prefer |size|, and only fall back to |span| if |size == 0|.
              size_to_use = size if size > 0 else span
              syms.append(
                  models.Symbol(
                      cur_section, size_to_use, address=address, full_name=tok))
              # Suppress symbols with overlapping |address|. This eliminates
              # labels from assembly sources.
              next_usable_address = address + size_to_use
              if cur_obj is not None:
                syms[-1].object_path = cur_obj

        else:
          logging.error('Problem line: %r', line)

    if promoted_name_count:
      logging.info('Found %d promoted global names', promoted_name_count)
    return self._section_sizes, syms

Example #4

Show file

File: linker_map_parser.py Project: xubaodian/chromium

    def Parse(self, lines):
        """Parses a linker map file.

    Args:
      lines: Iterable of lines, the first of which has been consumed to
      identify file type.

    Returns:
      A tuple of (section_sizes, symbols).
    """
        # Newest format:
        #     VMA      LMA     Size Align Out     In      Symbol
        #     194      194       13     1 .interp
        #     194      194       13     1         <internal>:(.interp)
        #     1a8      1a8     22d8     4 .ARM.exidx
        #     1b0      1b0        8     4         obj/sandbox/syscall.o:(.ARM.exidx)
        #     400      400   123400    64 .text
        #     600      600       14     4         obj/...:(.text.OUTLINED_FUNCTION_0)
        #     600      600        0     1                 $x.3
        #     600      600       14     1                 OUTLINED_FUNCTION_0
        #  123800   123800    20000   256 .rodata
        #  123800   123800       4      4         ...:o:(.rodata._ZN3fooE.llvm.1234)
        #  123800   123800       4      1                 foo (.llvm.1234)
        #  123804   123804       4      4         ...:o:(.rodata.bar.llvm.1234)
        #  123804   123804       4      1                 bar.llvm.1234
        # Older format:
        # Address          Size             Align Out     In      Symbol
        # 00000000002002a8 000000000000001c     1 .interp
        # 00000000002002a8 000000000000001c     1         <internal>:(.interp)
        # ...
        # 0000000000201000 0000000000000202    16 .text
        # 0000000000201000 000000000000002a     1         /[...]/crt1.o:(.text)
        # 0000000000201000 0000000000000000     0                 _start
        # 000000000020102a 0000000000000000     1         /[...]/crti.o:(.text)
        # 0000000000201030 00000000000000bd    16         /[...]/crtbegin.o:(.text)
        # 0000000000201030 0000000000000000     0                 deregister_tm_clones
        # 0000000000201060 0000000000000000     0                 register_tm_clones
        # 00000000002010a0 0000000000000000     0                 __do_global_dtors_aux
        # 00000000002010c0 0000000000000000     0                 frame_dummy
        # 00000000002010ed 0000000000000071     1         a.o:(.text)
        # 00000000002010ed 0000000000000071     0                 main
        syms = []
        cur_section = None
        cur_section_is_useful = None
        promoted_name_count = 0

        tokenizer = self.Tokenize(lines)
        # TODO(huangs): Use |span| from |tokenizer| to fix http://crbug.com/892648.
        for (line, address, size, level, _, tok) in tokenizer:
            # Level 1 data match the "Out" column. They specify sections or
            # PROVIDE_HIDDEN lines.
            if level == 1:
                if not tok.startswith('PROVIDE_HIDDEN'):
                    self._section_sizes[tok] = size
                cur_section = tok
                # E.g., Want to convert "(.text._name)" -> "_name" later.
                mangled_start_idx = len(cur_section) + 2
                cur_section_is_useful = (
                    cur_section in (models.SECTION_BSS, models.SECTION_RODATA,
                                    models.SECTION_TEXT)
                    or cur_section.startswith(models.SECTION_DATA))

            elif cur_section_is_useful:
                # Level 2 data match the "In" column. They specify object paths and
                # section names within objects, or '<internal>:...'.
                if level == 2:
                    # Create symbol, which can be modified as sym[-1] by Level 3 parsing.
                    syms.append(
                        models.Symbol(cur_section, size, address=address))
                    # E.g. path.o:(.text._name)
                    cur_obj, paren_value = tok.split(':')
                    # '(.text._name)' -> '_name'.
                    mangled_name = paren_value[mangled_start_idx:-1]
                    # As of 2017/11 LLD does not distinguish merged strings from other
                    # merged data. Feature request is filed under:
                    # https://bugs.llvm.org/show_bug.cgi?id=35248
                    if cur_obj == '<internal>':
                        if cur_section == '.rodata' and mangled_name == '':
                            # Treat all <internal> sections within .rodata as as string
                            # literals. Some may hold numeric constants or other data, but
                            # there is currently no way to distinguish them.
                            syms[-1].full_name = '** lld merge strings'
                        else:
                            # e.g. <internal>:(.text.thunk)
                            syms[-1].full_name = '** ' + mangled_name
                    elif cur_obj == 'lto.tmp' or 'thinlto-cache' in cur_obj:
                        pass
                    else:
                        syms[-1].object_path = cur_obj

                # Level 3 data match the "Symbol" column. They specify symbol names or
                # special names such as '.L_MergeGlobals'. Annotations such as '$d',
                # '$t.42' also appear at Level 3, but they are consumed by |tokenizer|,
                # so don't appear hear.
                elif level == 3:
                    # Ignore anything with '.L_MergedGlobals' prefix. This seems to only
                    # happen for ARM (32-bit) builds.
                    if tok.startswith('.L_MergedGlobals'):
                        continue
                    # Multiple Level 3 entries may exist. Take the first with |size != 0|.
                    # TODO(huangs): Process all entries to fix http://crbug.com/892648.
                    if size and not syms[-1].full_name:
                        # Outlined functions have names like OUTLINED_FUNCTION_0, which can
                        # appear 1000+ time, and can cause false aliasing. We treat these as
                        # special cases by designating them as a placeholder symbols and
                        # renaming them to '** outlined function'.
                        if tok.startswith('OUTLINED_FUNCTION_'):
                            tok = '** outlined function'
                        stripped_tok = demangle.StripLlvmPromotedGlobalNames(
                            tok)
                        if len(tok) != len(stripped_tok):
                            promoted_name_count += 1
                            tok = stripped_tok
                        syms[-1].full_name = tok
                else:
                    logging.error('Problem line: %r', line)

        if promoted_name_count:
            logging.info('Found %d promoted global names', promoted_name_count)
        return self._section_sizes, syms