def CollectAliasesByAddress(elf_path, tool_prefix): """Runs nm on |elf_path| and returns a dict of address->[names]""" # Constructors often show up twice, so use sets to ensure no duplicates. names_by_address = collections.defaultdict(set) # About 60mb of output, but piping takes ~30s, and loading it into RAM # directly takes 3s. args = [path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only', elf_path] output = subprocess.check_output(args) for line in output.splitlines(): space_idx = line.find(' ') address_str = line[:space_idx] section = line[space_idx + 1] mangled_name = line[space_idx + 3:] # To verify that rodata does not have aliases: # nm --no-sort --defined-only libchrome.so > nm.out # grep -v '\$' nm.out | grep ' r ' | sort | cut -d' ' -f1 > addrs # wc -l < addrs; uniq < addrs | wc -l if section not in 'tTW' or not _IsRelevantNmName(mangled_name): continue address = int(address_str, 16) if not address: continue names_by_address[address].add(mangled_name) # Demangle all names. names_by_address = demangle.DemangleSetsInDicts(names_by_address, tool_prefix) # Since this is run in a separate process, minimize data passing by returning # only aliased symbols. # Also: Sort to ensure stable ordering. return {k: sorted(v) for k, v in names_by_address.iteritems() if len(v) > 1}
def CollectAliasesByAddress(elf_path, tool_prefix): """Runs nm on |elf_path| and returns a dict of address->[names]""" # Constructors often show up twice, so use sets to ensure no duplicates. names_by_address = collections.defaultdict(set) # Many OUTLINED_FUNCTION_* entries can coexist on a single address, possibly # mixed with regular symbols. However, naively keeping these is bad because: # * OUTLINED_FUNCTION_* can have many duplicates. Keeping them would cause # false associations downstream, when looking up object_paths from names. # * For addresses with multiple OUTLINED_FUNCTION_* entries, we can't get the # associated object_path (exception: the one entry in the .map file, for LLD # without ThinLTO). So keeping copies around is rather useless. # Our solution is to merge OUTLINED_FUNCTION_* entries at the same address # into a single symbol. We'd also like to keep track of the number of copies # (although it will not be used to compute PSS computation). This is done by # writing the count in the name, e.g., '** outlined function * 5'. num_outlined_functions_at_address = collections.Counter() # About 60mb of output, but piping takes ~30s, and loading it into RAM # directly takes 3s. args = [path_util.GetNmPath(tool_prefix), '--no-sort', '--defined-only', elf_path] # pylint: disable=unexpected-keyword-arg proc = subprocess.Popen( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8') # llvm-nm may write to stderr. Discard to denoise. stdout, _ = proc.communicate() assert proc.returncode == 0 for line in stdout.splitlines(): space_idx = line.find(' ') address_str = line[:space_idx] section = line[space_idx + 1] mangled_name = line[space_idx + 3:] # To verify that rodata does not have aliases: # nm --no-sort --defined-only libchrome.so > nm.out # grep -v '\$' nm.out | grep ' r ' | sort | cut -d' ' -f1 > addrs # wc -l < addrs; uniq < addrs | wc -l if section not in 'tTW' or not _IsRelevantNmName(mangled_name): continue address = int(address_str, 16) if not address: continue if mangled_name.startswith('OUTLINED_FUNCTION_'): num_outlined_functions_at_address[address] += 1 else: names_by_address[address].add(mangled_name) # Need to add before demangling because |names_by_address| changes type. for address, count in num_outlined_functions_at_address.items(): name = '** outlined function' + (' * %d' % count if count > 1 else '') names_by_address[address].add(name) # Demangle all names. names_by_address = demangle.DemangleSetsInDicts(names_by_address, tool_prefix) # Since this is run in a separate process, minimize data passing by returning # only aliased symbols. # Also: Sort to ensure stable ordering. return { addr: sorted(names, key=lambda n: (n.startswith('**'), n)) for addr, names in names_by_address.items() if len(names) > 1 or num_outlined_functions_at_address.get(addr, 0) > 1 }