def demangle(name): ''' Translate C++ mangled name back into the verbose C++ symbol name (with helpful type info) ''' name = normName(name) try: import cxxfilt name = cxxfilt.demangle(name) except Exception as e: logger.debug('failed to demangle name (%r): %r', name, e) return name
def demangle_symbol(name): sym_name = name.replace('@@', '@') sym_ver = '' if '@' in sym_name: [sym_name, sym_ver] = sym_name.split('@') try: sym_name = cxxfilt.demangle(sym_name) except: pass if sym_ver != '': sym_name = sym_name + '@' + sym_ver lib_symver = sym_name return sym_name
def insertDestructor(func): if func in func2entry: return func2spell[func] = cxxfilt.demangle(func) destructorClass = getDestructorClass(func) func2loc[func] = destructorClass.attrib['file'] numBlocks[func] = 1 currBlock = func+"#0" func2entry[func], func2exit[func] = currBlock, currBlock block2coverage[currBlock] = True blockGraph[currBlock] = [] block2range[currBlock] = range2List(destructorClass.attrib['range.start']) + range2List(destructorClass.attrib['range.start']) desFunc2ID[func] = str(random.randint(1000000000000000, 9999999999999999)) desID2Func[desFunc2ID[func]] = func
def _parse(self, f, options={}): global db print "ELF Parsing" information = [] tmpname = "" with open("/tmp/%s" % os.path.basename(self.filename), "wb") as tmp_f: tmp_f.write(f.read()) tmpname = tmp_f.name with open(tmpname, "rb") as f: e = ELFFile(open(tmpname, "rb")) # Add arch information.append( self.createData("main", "ELF", ELF_ARCH=e.header.e_machine[3:])) for s in e.iter_sections(): if s['sh_type'] == 'SHT_STRTAB': for x in s.data().split("\x00"): if x != "": # Get Symbols try: information.append( self.createData("main", "ELF", ELF_FUNCTION=demangle(x))) except: information.append( self.createData("main", "ELF", ELF_FUNCTION=x)) flags = inspectelf.inspect(tmpname, recursive=False, cfg=True, force=True) if flags is not None: information.append( self.createData( "main", "ELF", **{ 'ELF_' + k.upper(): v for k, v in flags[tmpname].items() })) os.unlink(tmpname) return information
def kernel_times(self): """ Select all CUDA kernel call times: (start_nsec, end_nsec) :return: """ c = self.conn.cursor() # The times are in nanoseconds; lets convert them to microseconds as our base unit c.execute(textwrap.dedent(""" SELECT A.start AS start_nsec, A.end AS end_nsec, S.value as name FROM CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL AS A join StringTable AS S ON A.name == S._id_ ORDER BY A.start """)) kernel_results = c.fetchall() for r in kernel_results: r['name'] = cxxfilt.demangle(r['name']) for r in kernel_results: self.kernel_stats.add(r['name'], start_end_nsec_to_usec(r['start_nsec'], r['end_nsec']), nsec_to_usec(r['start_nsec']), nsec_to_usec(r['end_nsec'])) def fetch_memcpy(table): c.execute(textwrap.dedent(""" SELECT A.start AS start_nsec, A.end AS end_nsec, copyKind as name FROM {table} AS A ORDER BY A.start """.format(table=table))) results = c.fetchall() return results memcpy_results = fetch_memcpy('CUPTI_ACTIVITY_KIND_MEMCPY') + fetch_memcpy('CUPTI_ACTIVITY_KIND_MEMCPY2') for r in memcpy_results: r['name'] = COPYKIND_TO_MEMCPY.get(r['name'], r['name']) self.kernel_stats.add(r['name'], start_end_nsec_to_usec(r['start_nsec'], r['end_nsec']), nsec_to_usec(r['start_nsec']), nsec_to_usec(r['end_nsec']))
def getCurrentBlockFromTrace(record, lastBlockVisited): if 'CALLERNODEID' not in record: return func2entry[record['CALLEENAME']] func = stree.find('.//*[@id="'+record['CALLERNODEID']+'"]') funcName = func.attrib['linkage_name'] possibleCalls, reqLineNumber = [], int(record['INSLOC'].split(':')[1]) possibleBlocks, lastCol = [], None # Find possible function calls that might match our requirement if 'CALLEENODEID' not in record: justName = cxxfilt.demangle(record['CALLEENAME']).split('::')[-1] for callee in func.findall('.//CallExpr[@spelling="'+justName+'"]'): if range2List(callee.attrib['range.start'])[0] == reqLineNumber or\ range2List(callee.attrib['range.end'])[0] == reqLineNumber: possibleCalls.append(range2List(callee.attrib['range.end'])) else: for callee in func.findall('.//CallExpr[@def_id="'+record['CALLEENODEID']+'"]'): if range2List(callee.attrib['range.start'])[0] == reqLineNumber or\ range2List(callee.attrib['range.end'])[0] == reqLineNumber: possibleCalls.append(range2List(callee.attrib['range.end'])) # Find the basicBlocks that cover any of these functions for i in range(numBlocks[funcName]): currBlock = funcName + "#" + str(i) tmp = [pos for pos in possibleCalls if isContained(block2range[currBlock], pos)] if len(tmp) > 0: possibleBlocks.append(currBlock) if len(possibleBlocks) == 0: return func2entry[funcName] if len(possibleBlocks) >= 1: return possibleBlocks[0] # Different ways of filtering has to be applied here tmp = lastBlockVisited[:] while len(tmp) == 1: filtered = [currBlock for currBlock in possibleBlocks if currBlock in tmp] if len(firstFilter) == 1: if lastCol == True: ls = [pos for pos in possibleCalls if isContained(block2range[tmp[0]], pos)] ls = sorted(ls, key = cmp_to_key(locComapre)) return filtered[0], ls[0] else: return filtered[0] tmp = blockGraph[tmp] return possibleBlocks[0]
def getDestructorClass(func): # demangle the function s = cxxfilt.demangle(func) # Split the function on scope resolution operator s = s.split('::') destNS = stree # Iteratively go one step down in possible nested Namespaces for i in range(len(s)-2): destNS = destNS.find('.//Namespace[@spelling="'+s[i]+'"]') # In the final Namespace, search for the class destClass = destNS.find('.//ClassDecl[@spelling="'+s[-2]+'"]') if destClass is not None: return destClass # Or it might be from a Structure destClass = destNS.find('.//StructDecl[@spelling="'+s[-2]+'"]') return destClass
def isUnhandledDestructor(record): if isDestructor(record['CALLEENAME']): reqLineNumber = record['INSLOC'].split(':')[1] if 'CALLERNODEID' not in record: return False func = stree.find('.//*[@id="'+record['CALLERNODEID']+'"]') justName = cxxfilt.demangle(record['CALLEENAME']).split('::')[-1] for callee in func.findall('.//CallExpr[@spelling="'+justName+'"]'): if range2List(callee.attrib['range.start'])[0] == reqLineNumber or \ range2List(callee.attrib['range.end'])[0] == reqLineNumber: # It is not likely to be an implicitly called destructor since # it is in the same line as that of a constructor call to the same # class. Here we are again limited by PIN's column API return False return True return False
def __init__(self, name: str, address: int, symbol_type: str, demangled_name: Optional[str] = None): self.name = name self.address = address self.symbol_type = symbol_type if demangled_name is None: try: self.demangled_name = cxxfilt.demangle(name.split('@')[0]) except: self.demangled_name = demangled_name else: self.demangled_name = demangled_name
def parse_syms(infile): """Take the output of the `nm` command, and parse it into a tuple representing the symbols in the text segment of the binary. Returns a list of (address, symbol_name).""" from cxxfilt import demangle syms = [] for line in sys.stdin: addr, t, mangled = line.split() if t not in "tTvVwW": continue addr = int(addr, base=16) name = demangle(mangled) syms.append((addr, name)) return sorted(syms)
def dump_table(name: str) -> None: try: symbols = util.elf.build_addr_to_symbol_table(util.elf.my_symtab) decomp_symbols = { fn.decomp_name for fn in utils.get_functions() if fn.decomp_name } offset, size = util.elf.get_symbol_file_offset_and_size( util.elf.my_elf, util.elf.my_symtab, name) util.elf.my_elf.stream.seek(offset) vtable_bytes = util.elf.my_elf.stream.read(size) if not vtable_bytes: utils.fail( "empty vtable; has the key function been implemented? (https://lld.llvm.org/missingkeyfunction.html)" ) print( f"{Fore.WHITE}{Style.BRIGHT}{cxxfilt.demangle(name)}{Style.RESET_ALL}" ) print(f"{Fore.YELLOW}{Style.BRIGHT}vtable @ 0x0{Style.RESET_ALL}") assert size % 8 == 0 for i in range(size // 8): word: int = struct.unpack_from("<Q", vtable_bytes, 8 * i)[0] name = symbols.get(word, None) if word == 0: pass elif name is not None: demangled_name: str = cxxfilt.demangle(name) color = Fore.GREEN if name in decomp_symbols else Fore.BLUE print(f"{color}{bold(demangled_name)}{Style.RESET_ALL}") print(f" {name}") elif word & (1 << 63): offset = -struct.unpack_from("<q", vtable_bytes, 8 * i)[0] print() print( f"{Fore.YELLOW}{Style.BRIGHT}vtable @ {offset:#x}{Style.RESET_ALL}" ) else: print(f"{Fore.RED}unknown data: {word:016x}{Style.RESET_ALL}") except KeyError: utils.fail("could not find symbol")
def output_to_database(symbol_info): [ pkg_name, pkg_version, pkg_dep_name, pkg_dep_version, lib_name, lib_object, lib_version_old, lib_version_new, lib_direct, lib_symver, lib_severity, lib_data_type ] = symbol_info symbol_info = [pkg_name, pkg_version, pkg_dep_name, pkg_dep_version, \ lib_name, lib_object, lib_version_old, lib_version_new, \ lib_direct, lib_severity] # demangle lib_symver sym_name = lib_symver sym_ver = '' if '@' in sym_name: [sym_name, sym_ver] = sym_name.split('@') try: sym_name = cxxfilt.demangle(sym_name) except: pass if sym_ver != '': sym_name = sym_name + '@' + sym_ver lib_symver = sym_name # direct symbol if lib_data_type == 'None': symbol_info.append(lib_symver) # indirect symbol else: symbol_info.append(lib_data_type) # first output if str(symbol_info) not in has_output_datatype: has_output_datatype.add(str(symbol_info)) # already output else: symbol_info = [] if symbol_info == []: return # insert symbol_info into database stmt = 'insert into potential_depbug (PkgName, PkgVer, Depname, DepVer, \ LibName, LibObject, PreVer, PostVer, Direction, Severity, Symbol) \ values (?,?,?,?,?,?,?,?,?,?,?)' conn.execute(stmt, symbol_info)
def scorep_top1_name(obj): '''Reports demangled name of top1 function name, for instance: .. code-block:: > c++filt ... _ZN6sphexa3sph31computeMomentumAndEnergyIADImplIdNS_13 ... ParticlesDataIdEEEEvRKNS_4TaskERT0_ void sphexa::sph::computeMomentumAndEnergyIADImpl ... <double, sphexa::ParticlesData<double> > ... (sphexa::Task const&, sphexa::ParticlesData<double>&) ''' regex = r'^\s{9}(USR|COM).*\s+(?P<pct>\S+)\s+\S+\s+(?P<fn>_\w+)' rpt = os.path.join(obj.stagedir, obj.rpt_score) result = cxxfilt.demangle(sn.evaluate(sn.extractsingle(regex, rpt, 'fn'))) # print("fn=", result) return ('% (' + result.split('<')[0] + ')')
def add(self, state): prev = "" for x in state.history.descriptions: self.debug.append(x) ### register relation if not prev == "": k = (prev, x) if k in self.trace: self.trace[k] += 1 else: self.trace[k] = 1 prev = str(x) ### create node if necessary if self.get_node_name_by_description(x) == self.NO_SUCH_NODE: pc, err = self.get_pc_from_description(x) if err: self.create_node(label=x, description=x) else: self.transition_addrs.append(pc) describe_addr = self.proj.loader.describe_addr(pc) if "(offset" in describe_addr: func = describe_addr.split()[0] func = cxxfilt.demangle(func) offset = describe_addr.split()[2].replace(')', '') describe_addr = ' '.join([func] + describe_addr.split()[1:]) self.transition_offsets.append(offset) fillcolor = "transparent" if describe_addr.startswith("_start"): fillcolor = "gray" elif "in main binary" in describe_addr: fillcolor = "lightskyblue" elif "_error(" in describe_addr or "_exception" in describe_addr: # unwanted result? fillcolor = "salmon" additional_line = "" # if self.disasm is not {} and "UserHook" in x: if self.disasm is not {}: if pc in self.disasm: additional_line = "\\n" + self.disasm[pc] self.create_node(label=x + "\\n" + describe_addr + additional_line, description=x, fillcolor=fillcolor)
def demangle(src_path, dst_path): """ src_path: 使用 nm -S libxxx.so 导出的动态库符号表的文件路径 dst_path: demangle后的符号表保存路径 """ os.path.isfile(src_path) lines = [] with open(src_path, "rb") as f: lines = f.readlines() names = [] for line in lines: #print(line) parts = str(line, 'utf-8').rstrip("\r\n").split(' ') if len(parts) == 4: name = cxxfilt.demangle(parts[3]) names.append(name) with open("dst_path", "wb") as f: for name in names: f.write(bytes(name, 'utf-8')) f.write(bytes('\n', 'utf-8'))
def generate_ai_loadparam_body(info: list) -> str: out = [] for entry in info: type_ = entry["type"] if type_ == "dynamic_param": if entry["param_name"]: out.append( f'getDynamicParam(&{get_member_name(entry)}, "{entry["param_name"]}");' ) elif type_ == "dynamic2_param": if entry["param_name"]: out.append( f'getDynamicParam2(&{get_member_name(entry)}, "{entry["param_name"]}");' ) elif type_ == "static_param": if entry["param_name"]: out.append( f'getStaticParam(&{get_member_name(entry)}, "{entry["param_name"]}");' ) elif type_ == "map_unit_param": if entry["param_name"]: out.append( f'getMapUnitParam(&{get_member_name(entry)}, "{entry["param_name"]}");' ) elif type_ == "aitree_variable": if entry["param_name"]: out.append( f'getAITreeVariable(&{get_member_name(entry)}, "{entry["param_name"]}");' ) elif type_ == "call": fn_name: str = entry["fn"] if fn_name.startswith("_ZN") and fn_name.endswith( "11loadParams_Ev"): parent_class_name = cxxfilt.demangle(fn_name).split("::")[-2] out.append(f"{parent_class_name}::loadParams_();") else: out.append(f"// FIXME: CALL {fn_name} @ {entry['addr']:#x}") else: raise AssertionError(f"unknown type: {type_}") return "\n".join(out)
def parse_mangled_name(name): """Take a potentially mangled symbol name and demangle it to its name, removing the trailing hash. Raise a cxxflit.InvalidName exception if it is not a mangled symbol.""" demangled = cxxfilt.demangle(name, external_only=False) corrected_name = trim_hash_from_symbol(demangled) # Rust-specific mangled names triggered by Tock Components, e.g. # ZN100_$LT$capsules..ieee802154..driver..RadioDriver$u20$as$u20$capsules..ieee802154..device..RxClient$GT$7receive # This name has two parts: the structure, then the trait method it is # implementing. This code parses only the structure name, so all # methods that are trait implementations are just clumped under the # name of the structure. -pal if corrected_name[0:5] == "_$LT$": # Trim off the _$LT$, then truncate at next $, this will extract # capsules..ieee802154..driver..RadioDriver corrected_name = corrected_name[5:] endpos = corrected_name.find("$") if endpos > 0: corrected_name = corrected_name[0:endpos] return corrected_name
def parse_used_labels(lines: List[str]) -> Set[str]: used_labels = set() re_label = re.compile('\\.[A-Za-z0-9_.]+') for line in lines: if line == '' or line[0] == '#' or line[0] == '.' or \ line.startswith('\t.'): continue if line[0] == '_': func_name = line.split(':')[0] demangled_name = cxxfilt.demangle(func_name) line = demangled_name + ':' used_labels.add(demangled_name) continue label = re_label.search(line) if label: used_labels.add(label.group()) return used_labels
def map_assembly_lines(lines: List[str], location_marker='\t.loc') -> \ List[Tuple[str, int]]: result = [] source_line = 0 for line in lines: if line == '': source_line = 0 continue if line[0] == '.': result.append((line, 0)) continue if line.startswith('\t.'): result.append((line, 0)) if line.startswith(location_marker): tokens = line.replace('\t', ' ').split(' ') # print(tokens) source_line = int(tokens[3]) continue if line.startswith('#') or line[:-1].isnumeric(): continue if line[0] == '_': func_name = line.split(':')[0] demangled_name = cxxfilt.demangle(func_name) result.append((demangled_name + ':', 0)) source_line = 0 continue line = trim_comment(line) if line == '': continue result.append((line, source_line)) return result
def initialize_binary(self, path): if path in self._setup: raise BinaryAlreadyAddedError('Binary at {} already added'.format(path)) try: symbols = check_output(['nm', path]).decode().rstrip().split('\n') except CalledProcessError: raise BinaryNotExistsError functions = [symbol.split()[-1] for symbol in symbols] init_state = {} for function in functions: try: name = cxxfilt.demangle(function) except cxxfilt.InvalidName: name = function init_state[name] = { 'mangled': function, 'traced': False, 'parameters': {} } self._setup[path] = init_state
def are_demangled_names_equal(name1: str, name2: str): return cxxfilt.demangle(name1) == cxxfilt.demangle(name2)
def on_message(message, data): #print(message); print(message["payload"] + " - " + cxxfilt.demangle(message["payload"]))
def demangle(name): return cxxfilt.demangle(name)
def demangle(uc_name): if uc_name.startswith("0x"): return uc_name uc_name = uc_name[1:] return cxxfilt.demangle(uc_name)
def convert_preplf_to_rel(preplfPath, outRelPath): preplf = PreplfFile(preplfPath) rel = OutputStream() # Initial header info rel.write_long( 2 ) # Module ID. Hardcoding 2 here because 0 is the dol and the game already has a module using 1 (NESemu.rel). Should be more robust ideally rel.write_long(0) # Next module link - always 0 rel.write_long(0) # Prev module link - always 0 rel.write_long(len(preplf.sections)) # Num sections rel.write_long(0) # Section info offset filler rel.write_long( 0 ) # Module name offset (our rel won't include the module name so this is staying null) rel.write_long(0) # Module name size rel.write_long(2) # Module version # Fetch data needed for the rest of the header bssSec = preplf.section_by_name(".bss") assert (bssSec != None) prologSymbol = preplf.symbol_by_name("_prolog") if prologSymbol is None: prologSymbol = preplf.symbol_by_name("_prolog__Fv") epilogSymbol = preplf.symbol_by_name("_epilog") if epilogSymbol is None: epilogSymbol = preplf.symbol_by_name("_epilog__Fv") unresolvedSymbol = preplf.symbol_by_name("_unresolved") if unresolvedSymbol is None: unresolvedSymbol = preplf.symbol_by_name("_unresolved__Fv") # Remaining header data rel.write_long(bssSec.size) rel.write_long(0) # Relocation table offset filler rel.write_long(0) # Imports offset filler rel.write_long(0) # Imports size filler rel.write_byte( prologSymbol['sectionIndex'] if prologSymbol is not None else 0) rel.write_byte( epilogSymbol['sectionIndex'] if epilogSymbol is not None else 0) rel.write_byte(unresolvedSymbol['sectionIndex'] if unresolvedSymbol is not None else 0) rel.write_byte(0) # Padding rel.write_long(prologSymbol['value'] if prologSymbol is not None else 0) rel.write_long(epilogSymbol['value'] if epilogSymbol is not None else 0) rel.write_long( unresolvedSymbol['value'] if unresolvedSymbol is not None else 0) rel.write_long(8) # Module alignment rel.write_long(8) # BSS alignment # Section info filler sectionInfoOffset = rel.tell() for section in preplf.sections: rel.write_long(0) rel.write_long(0) # Write sections sectionInfoList = [] wroteBss = False for section in preplf.sections: # Sections not in the to-keep list should be nulled out info = {} info['exec'] = (section.flags & 0x4) != 0 secStart = rel.tell() name = section.name isBss = name == ".bss" keepSections = [ ".text", ".rodata", ".ctors", ".dtors", ".data", ".init", ".rela.init", ".rela.text", ".rela.fini", ".rela.rodata", ".rela.eh_frame", ".rela.data", ".fini", ".eh_frame" # Not actually used? ] shouldKeep = name in keepSections if not shouldKeep: for sec in keepSections: if name.startswith(sec): shouldKeep = True break if shouldKeep is True: info['offset'] = secStart rel.write_bytes(section.data) rel.write_to_boundary(4) info['size'] = rel.tell() - secStart elif isBss is True: info['offset'] = 0 info['size'] = section.size wroteBss = True elif name == ".group": info['offset'] = 0 info['size'] = 0 # elif ".rela" in name: # info['offset'] = 0 # info['size'] = 0 else: assert (not name or wroteBss is True), name info['offset'] = 0 info['size'] = 0 sectionInfoList.append(info) # Generate imports and write imports section filler imports = [] moduleImports = {'moduleID': 2, 'relocsOffset': 0} dolImports = {'moduleID': 0, 'relocsOffset': 0} imports.append(moduleImports) imports.append(dolImports) importsOffset = rel.tell() for importIdx in range(0, len(imports)): rel.write_long(0) rel.write_long(0) importsSize = rel.tell() - importsOffset # Write relocations relocsOffset = rel.tell() relocWriteSuccess = True unresolvedSymbolCount = 0 for importIdx in range(0, 2): imports[importIdx]['relocsOffset'] = rel.tell() isDol = imports[importIdx]['moduleID'] == 0 for section in preplf.sections: if section.type != EST_RELA or len(section.relocs) == 0: continue symbolSection = section.link targetSection = section.targetSecIdx # Make sure we only write relocations for sections that were written to the file sectionInfo = sectionInfoList[targetSection] if sectionInfo['offset'] == 0: continue curOffset = 0 wroteSectionCommand = False # Parse relocations for reloc in sorted(section.relocs, key=lambda x: x['offset']): # for reloc in section.relocs: symbol = preplf.fetch_symbol(symbolSection, reloc['symbolIdx']) assert (symbol != None) # DOL relocations have a section index of 0; internal relocations have a valid section index if (symbol['sectionIndex'] == 0) != isDol: continue # This is a valid relocation, so we have at least one - write the "change section" directive if not wroteSectionCommand: rel.write_short(0) rel.write_byte(R_DOLPHIN_SECTION) rel.write_byte(targetSection) rel.write_long(0) wroteSectionCommand = True offset = reloc['offset'] - curOffset # Add "nop" directives to make sure the offset will fit # NOTE/TODO - not sure if this is actually supposed to be a signed offset - that needs to be verified while offset > 0xFFFF: rel.write_short(0xFFFF) rel.write_byte(R_DOLPHIN_NOP) rel.write_byte(0) rel.write_long(0) offset -= 0xFFFF curOffset += 0xFFFF # Write relocation rel.write_short(offset) relocType = reloc['relocType'] if relocType == 26: rel.write_byte(11) # I patched rel14 to be rel32 # rel.write_byte(26) else: rel.write_byte(relocType) # Internal relocs are easy - just copy data from the ELF reloc/symbol if not isDol: rel.write_byte(symbol['sectionIndex']) rel.write_long(symbol['value'] + reloc['addend']) # this is basically just the section-relative offset to the symbol # DOL relocs will require looking up the address of the symbol in the DOL else: symbolName = symbol['name'] demangled = cxxfilt.demangle(symbolName) remangled = demangled if ('(' in demangled and ')' in demangled ) or '::' in demangled or 'operator' in demangled: remangled = mangle(demangled) dolSymbolAddr = dolFile.get_symbol(remangled) if dolSymbolAddr is None: unresolvedSymbolCount += 1 print( "Error: Failed to locate dol symbol: %s / %s (GCC: %s)" % (remangled, demangled, symbolName)) rel.write_byte(0) rel.write_long(0) relocWriteSuccess = False continue rel.write_byte(0) rel.write_long(dolSymbolAddr) curOffset += offset if unresolvedSymbolCount > 0: print("Failed to find %s symbols" % unresolvedSymbolCount) # Write "end" directive rel.write_short(0) rel.write_byte(R_DOLPHIN_END) rel.write_byte(0) rel.write_long(0) # Quit out? if not relocWriteSuccess: return False # Write filler values from the header rel.goto(0x10) rel.write_long(sectionInfoOffset) rel.goto(0x24) rel.write_long(relocsOffset) rel.write_long(importsOffset) rel.write_long(importsSize) rel.goto(sectionInfoOffset) for section in sectionInfoList: # Toggle 0x1 bit on the section offset for sections containing executable code offset = section['offset'] if section['exec'] is True: offset |= 0x1 rel.write_long(offset) rel.write_long(section['size']) rel.goto(importsOffset) for imp in imports: rel.write_long(imp['moduleID']) rel.write_long(imp['relocsOffset']) # Done rel.save_file(outRelPath) print("Saved REL to %s" % outRelPath) return True
def demangle(symbol: str): try: symbol = cxxfilt.demangle(symbol, external_only=False) except cxxfilt.InvalidName: pass return symbol
def demangle(name): """ Demangle a C++ string """ return cxxfilt.demangle(name)
def sofa_hsg(cfg, swarm_groups, swarm_stats, t_offset, cpu_mhz_xp, cpu_mhz_fp): """ hierarchical swarm generation """ with open(cfg.logdir + 'perf.script') as f, warnings.catch_warnings(): warnings.filterwarnings("ignore") samples = f.readlines() print_info(cfg, "Length of cpu_traces for HSG = %d" % len(samples)) if len(samples) > 0: with mp.Pool() as pool: res = pool.map( partial(cpu_trace_read_hsg, t_offset=t_offset, cfg=cfg, cpu_mhz_xp=cpu_mhz_xp, cpu_mhz_fp=cpu_mhz_fp), samples) cpu_traces = pd.DataFrame(res) sofa_fieldnames_ext = sofa_fieldnames + [ "feature_types", "mem_addr" ] # mem_addr for swarm-diff cpu_traces.columns = sofa_fieldnames_ext cpu_traces.to_csv(cfg.logdir + 'hsg_trace.csv', mode='w', header=True, index=False, float_format='%.6f') res_viz = list_downsample(res, cfg.plot_ratio) swarm_cpu_traces_viz = pd.DataFrame(res_viz) swarm_cpu_traces_viz.columns = sofa_fieldnames_ext char1 = ']' char2 = '+' # demangle c++ symbol, little dirty work here... swarm_cpu_traces_viz['name'] = swarm_cpu_traces_viz['name'].apply( lambda x: cxxfilt.demangle( str(x[x.find(char1) + 1:x.find(char2)].split('@')[0]))) ### N features ### ## In order to merge, give unique id of each data within 10 msec by time quotient swarm_cpu_traces_viz[ 'quotient'] = swarm_cpu_traces_viz['timestamp'].apply( lambda x: int(x * 1000 // 10)) # //: quotient # count feature_types in each 10 msec groups, and create a dictionary for mapping df2s = {} for quotient, dataframe in swarm_cpu_traces_viz.groupby( ['quotient', 'event']): # api value_counts(): return pandas series df2s[quotient] = dataframe.feature_types.value_counts() df2 = pd.DataFrame.from_dict( df2s, orient='index').fillna(0).astype(np.int64) df = swarm_cpu_traces_viz.copy() swarm_cpu_traces_viz = pd.merge(df, df2, left_on=['quotient', 'event'], right_index=True).copy() ### swarm seperation by memory location #swarm_groups = [] feature_list = ['event'] if cfg.hsg_multifeatures: with open(cfg.logdir + 'perf_events_used.txt', 'r') as f: lines = f.readlines() feature_list.extend(lines[0].split(',')) try: feature_list.remove('cycles') feature_list.remove('event') except: pass print_info(cfg, 'HSG features: ' + ','.join(feature_list)) idx = 0 showing_idx = 0 if len(cpu_traces) > 0: # get memory index by cheange float to integer swarm_cpu_traces_viz[ 'event_int'] = swarm_cpu_traces_viz.event.apply( lambda x: int(x)) # add new column 'event_int' # swarm seperate event_groups = swarm_cpu_traces_viz.groupby('event_int') #swarm_stats = [] # add different swarm groups for mem_index, l1_group in event_groups: # kmeans X = pd.DataFrame(l1_group['event']) num_of_cluster = 2 y_pred = kmeans_cluster(num_of_cluster, X) # add new column # TODO: Eliminate warning of SettingWithCopyWarning l1_group['cluster'] = y_pred #for i in range(len(y_pred)): # group.loc[i, 'cluster'] = y_pred[i] # group by new column clusters = l1_group.groupby('cluster') for l2_group_idx, l2_group in clusters: # group by process id #pid_clusters = cluster.groupby('pid') X = pd.DataFrame(l2_group['event']) num_of_cluster = 4 y_pred = kmeans_cluster(num_of_cluster, X) # add new column l2_group['cluster'] = y_pred #for i in range(len(y_pred)): # l2_group.loc[i, 'cluster'] = y_pred[i] # group by new column l3_groups = l2_group.groupby('cluster') for l3_group_idx, l3_group in l3_groups: # kmeans X = pd.DataFrame(l3_group['event']) num_of_cluster = 4 y_pred_pid_cluster = kmeans_cluster( num_of_cluster, X) # add new column l3_group['cluster_in_pid'] = y_pred_pid_cluster # group by new column cluster_in_pid_clusters = l3_group.groupby( 'cluster_in_pid') for mini_cluster_id, cluster_in_pid_cluster in cluster_in_pid_clusters: # duration time total_duration = cluster_in_pid_cluster.duration.sum( ) mean_duration = cluster_in_pid_cluster.duration.mean( ) count = len(cluster_in_pid_cluster) # swarm diff # caption: assign mode of function name mode = str( cluster_in_pid_cluster['name'].mode()[0] ) # api pd.Series.mode() returns a pandas series mode = mode.replace( '::', '@') # str.replace(old, new[, max]) # print('mode of this cluster: {}'.format(str(mode[:35]))) # uncomment this line of code when you need to check the mode of cluster swarm_stats.append({ 'keyword': 'SWARM_' + '["' + str(mode[:35]) + ']' + ('_' * showing_idx), 'duration_sum': total_duration, 'duration_mean': mean_duration, 'example': cluster_in_pid_cluster.head( 1)['name'].to_string().split(' ')[2], 'count': count }) swarm_groups.append({ 'group': cluster_in_pid_cluster.drop(columns=[ 'event_int', 'cluster', 'cluster_in_pid' ]), # data of each group 'color': random_generate_color(), 'keyword': 'SWARM_' + '[' + str(mode[:35]) + ']' + ('_' * showing_idx), 'total_duration': total_duration }) idx += 1 swarm_groups.sort(key=itemgetter('total_duration'), reverse=True) # reverse = True: descending swarm_stats.sort(key=itemgetter('duration_sum'), reverse=True) print_title('HSG Statistics - Top-%d Swarms' % (cfg.num_swarms)) print('%45s\t%13s\t%30s' % ('SwarmCaption', 'ExecutionTime[sum,mean,count] (s)', 'Example')) for i in range(len(swarm_stats)): if i >= cfg.num_swarms: break else: swarm = swarm_stats[i] print('%45s\t%.6lf, %.6lf, %6d\t%45s' % (swarm['keyword'], swarm['duration_sum'] / 4.0, swarm['duration_mean'] / 4.0, swarm['count'], swarm['example'])) return swarm_groups, swarm_stats
def do_diff(basedump: str, mydump: str) -> List[OutputLine]: output: List[OutputLine] = [] lines1 = process(basedump.split("\n")) lines2 = process(mydump.split("\n")) sc1 = SymbolColorer(0) sc2 = SymbolColorer(0) sc3 = SymbolColorer(4) sc4 = SymbolColorer(4) sc5 = SymbolColorer(0) sc6 = SymbolColorer(0) bts1: Set[str] = set() bts2: Set[str] = set() if args.show_branches: for (lines, btset, sc) in [ (lines1, bts1, sc5), (lines2, bts2, sc6), ]: for line in lines: bt = line.branch_target if bt is not None: btset.add(bt + ":") sc.color_symbol(bt + ":") for (tag, i1, i2, j1, j2) in diff_sequences([line.mnemonic for line in lines1], [line.mnemonic for line in lines2]): for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]): if tag == "replace": if line1 is None: tag = "insert" elif line2 is None: tag = "delete" elif tag == "insert": assert line1 is None elif tag == "delete": assert line2 is None line_color1 = line_color2 = sym_color = Fore.RESET line_prefix = " " if line1 and line2 and line1.diff_row == line2.diff_row: if maybe_normalize_large_imms( line1.original) == maybe_normalize_large_imms( line2.original): out1 = line1.original out2 = line2.original elif line1.diff_row == "<delay-slot>": out1 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line1.original}" out2 = f"{Style.BRIGHT}{Fore.LIGHTBLACK_EX}{line2.original}" else: mnemonic = line1.original.split()[0] out1, out2 = line1.original, line2.original branch1 = branch2 = "" if mnemonic in instructions_with_address_immediates: out1, branch1 = split_off_branch(line1.original) out2, branch2 = split_off_branch(line2.original) branchless1 = out1 branchless2 = out2 out1, out2 = color_imms(out1, out2) same_relative_target = False if line1.branch_target is not None and line2.branch_target is not None: relative_target1 = eval_line_num( line1.branch_target) - eval_line_num( line1.line_num) relative_target2 = eval_line_num( line2.branch_target) - eval_line_num( line2.line_num) same_relative_target = relative_target1 == relative_target2 if not same_relative_target: branch1, branch2 = color_branch_imms(branch1, branch2) out1 += branch1 out2 += branch2 if normalize_imms(branchless1) == normalize_imms( branchless2): if not same_relative_target: # only imms differences sym_color = Fore.LIGHTBLUE_EX line_prefix = "i" else: out1 = re.sub( re_sprel, lambda s: sc3.color_symbol(s.group()), out1, ) out2 = re.sub( re_sprel, lambda s: sc4.color_symbol(s.group()), out2, ) if normalize_stack(branchless1) == normalize_stack( branchless2): # only stack differences (luckily stack and imm # differences can't be combined in MIPS, so we # don't have to think about that case) sym_color = Fore.YELLOW line_prefix = "s" else: # regs differences and maybe imms as well out1 = re.sub( re_reg, lambda s: sc1.color_symbol(s.group()), out1) out2 = re.sub( re_reg, lambda s: sc2.color_symbol(s.group()), out2) line_color1 = line_color2 = sym_color = Fore.YELLOW line_prefix = "r" elif line1 and line2: line_prefix = "|" line_color1 = Fore.LIGHTBLUE_EX line_color2 = Fore.LIGHTBLUE_EX sym_color = Fore.LIGHTBLUE_EX out1 = line1.original out2 = line2.original elif line1: line_prefix = "<" line_color1 = sym_color = Fore.RED out1 = line1.original out2 = "" elif line2: line_prefix = ">" line_color2 = sym_color = Fore.GREEN out1 = "" out2 = line2.original if args.source and line2 and line2.comment: out2 += f" {line2.comment}" def format_part(out: str, line: Optional[Line], line_color: str, btset: Set[str], sc: SymbolColorer) -> Optional[str]: if line is None: return None in_arrow = " " out_arrow = "" if args.show_branches: if line.line_num in btset: in_arrow = sc.color_symbol(line.line_num, "~>") + line_color if line.branch_target is not None: out_arrow = " " + sc.color_symbol( line.branch_target + ":", "~>") out = pad_mnemonic(out) return f"{line_color}{line.line_num} {in_arrow} {out}{Style.RESET_ALL}{out_arrow}" part1 = format_part(out1, line1, line_color1, bts1, sc5) part2 = format_part(out2, line2, line_color2, bts2, sc6) key2 = out2 or "" mid = f"{sym_color}{line_prefix}" if line2: for source_line in line2.source_lines: color = Style.DIM # File names and function names if source_line and source_line[0] != "|": color += Style.BRIGHT # Function names if source_line.endswith("():"): # Underline. Colorama does not provide this feature, unfortunately. color += "\u001b[4m" try: source_line = cxxfilt.demangle( source_line[:-3], external_only=False) except: pass output.append( OutputLine(None, f" {color}{source_line}{Style.RESET_ALL}", source_line)) fmt2 = mid + " " + (part2 or "") output.append(OutputLine(part1, fmt2, key2)) return output
def isDestructor(func): return cxxfilt.demangle(func).split('::')[-1][0] == '~'