def __init__(self, name, address, size, alignment, objectFile, arch): # Symbol name self._name = name # Symbol address for each arch self._addresses = { Arch.X86 : -1, Arch.ARM : -1, Arch.POWER : -1 } self._addresses[arch] = address # Symbol size for each arch self._sizes = { Arch.X86 : -1, Arch.ARM : -1, Arch.POWER : -1 } self._sizes[arch] = size # Alignemt rule for each arch self._alignments = { Arch.X86 : -1, Arch.ARM : -1, Arch.POWER : -1 } self._alignments[arch] = alignment # Which architecture are referencing this symbol? self._isReferenced = { Arch.X86 : False, Arch.ARM : False, Arch.POWER : False } self._isReferenced[arch] = True # padding to add before and after the symbol in the produced linker # script, for each architecture self._paddingBefore = { Arch.X86 : 0, Arch.ARM : 0, Arch.POWER : 0 } self._paddingAfter = { Arch.X86 : 0, Arch.ARM : 0, Arch.POWER : 0 } # Object file the symbol initially comes from self._objectFiles = { Arch.X86 : "NULL", Arch.ARM : "NULL", Arch.POWER : "NULL" } if not symbolObjectFileSanityCheck(objectFile): er("Failed sanity check on object file during symbol instance " + "creation\n") sys.exit(-1); self._objectFiles[arch] = objectFile
def checkFilesExistence(fileList): """ Check for the existence of each file which path is in the list fileList and exit if the file does not exist """ for f in fileList: if not os.path.isfile(f): er("File '" + f + "' does not exist/is not a file") exit(-1)
def updateSymbolsList(self, symbolsList): """symbolsList is a dictionary of lists, one per section, for example: symbolsList = { ".text" : [], ".rodata" : [], ".bss" : [], ".data" : [], ".tdata" : [], ".tbss" : [] } This function TODO update description and maybe change the name accordinglyn. The way to call it is: Arch1.updateSymbolsList(list) Arch2.updateSymbolsList(list) Arch3.updateSymbolsList(list) etc. """ arch = self.getArch() # Grab info about sections from the executable consideredSections = symbolsList.keys() sectionsInfo = ReadElfParser.getSectionInfo( self.getExecutable(), filterSections=consideredSections) # Grab symbols from the map file symbolsToAdd = self.parseMapFile() for symbol in symbolsToAdd: # First find the section sectionName = self.getSection(symbol, sectionsInfo) if not sectionName: #Symbol not in one of the considered sections continue # is the symbol blacklisted? if sectionName in Globals.SYMBOLS_BLACKLIST.keys(): if symbol.getName() in Globals.SYMBOLS_BLACKLIST[sectionName]: continue updated = False for existingSymbol in symbolsList[sectionName]: if symbol.compare(existingSymbol): # Found similar symbol in another arch ... if existingSymbol.getReference(arch): #... or not er("Already referenced updated symbol: " + existingSymbol.getName() + "|" + existingSymbol.getObjectFile(arch) + "|" + str(hex(existingSymbol.getAlignment(arch))) + "|" + str(hex(symbol.getAlignment(arch))) + " (" + self.getArchString() + ")\n") sys.exit(-1) existingSymbol.setAddress(symbol.getAddress(arch), arch) existingSymbol.setSize(symbol.getSize(arch), arch) existingSymbol.setAlignment(symbol.getAlignment(arch), arch) existingSymbol.setReference(arch) existingSymbol.setObjectFile(symbol.getObjectFile(arch), arch) updated = True break if not updated: symbolsList[sectionName].append(symbol)
def getSectionInfo(binaryPath, filterSections=None): """This function takes a path to an ELF binary as parameter, executes readelf on it, parsing the output, building then returning a list of sections objects filterSections is a list of sections to consider (i.e. the result returned will only contain info about these), ex: [".data", ".text", etc.] """ absolutePath = os.path.abspath(binaryPath) cmd = ["readelf", "-SW", absolutePath] res = [] readelfRe = ("^[\s]*\[([\s0-9]+)\]\s([.\S]*)?\s+([.\S]+)\s+([0-9a-f]+)" + "\s+([0-9a-f]+)\s+([0-9a-f]+)\s+([0-9a-f]+)\s+([.\S]*)\s+([0-9a-f]+)" + "\s+([0-9a-f]+)\s+([0-9a-f]+)$") # not bad :) try: readelf_output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: er("executing readelf " + absolutePath + " :\n") er(e.output) sys.exit() for line in readelf_output.split("\n"): matchRes = re.match(readelfRe, line) if matchRes: # I checked readelf sources so I know which of these are in hex # and which are in decimal format s = Section(int(matchRes.group(1)), # index (dec) matchRes.group(2), # name matchRes.group(3), # type int("0x" + matchRes.group(4), 0), # address (hex) int("0x" + matchRes.group(5), 0), # offset (hex) int("0x" + matchRes.group(6), 0), # size (hex) int("0x" + matchRes.group(7), 0), # ES (hex) matchRes.group(8), # flags int(matchRes.group(9)), # Lk int(matchRes.group(10)), # Inf int(matchRes.group(11))) # alignment if filterSections and (s.getName() not in filterSections): continue res.append(s) return res
def parseMapFile(self): """Returns a list of Symbols instances extracted from the map file which path is filePath (should have been generalted by gold.ld -Map <file> """ filePath = self.getMapFile() res = [] # The symbols described in the parsed file can mostly have 2 forms: # - 1 line description: "<name> <addr> <size> <alignment> <object file>" # - 2 lines description: # "<name> # <addr> <size> <alignment> <object file>" # So here I have 1 regexp for the single line scenario and 2 for the # double lines one. This filters out a lot of uneeded stuff, but we # still need to only keep symbols related to text/data/rodata/bss so # an additional check is performed on the extracted symbosl before # adding it to the result set twoLinesRe1 = "^[\s]+(\.[texrodalcbs\.]+[\S]*)$" twoLinesRe2 = ("^[\s]+(0x[0-9a-f]+)[\s]+(0x[0-9a-f]+)[\s]+" + "(0x[0-9a-f]+)[\s]+(.*)$") oneLineRe = ("^[\s]+(\.[texrodalcbs\.]+[\S]+)[\s]+(0x[0-9a-f]+)[\s]+" + "(0x[0-9a-f]+)[\s]+(0x[0-9a-f]+)[\s]+(.*)$") with open(filePath, "r") as mapfile: lines = mapfile.readlines() for index, line in enumerate(lines): s = None matchResult = re.match(twoLinesRe1, line) if matchResult: # probably a 2-lines symbol description nextLine = lines[index + 1] matchResult2 = re.match(twoLinesRe2, nextLine) if matchResult2: name = matchResult.group(1) address = int(matchResult2.group(1), 0) size = int(matchResult2.group(2), 0) alignment = int(matchResult2.group(3), 0) objectFile = matchResult2.group(4) s = Symbol.Symbol(name, address, size, alignment, objectFile, self.getArch()) else: er("missed a two lines symbol while parsing mapfile:\n" ) er("line1: " + line + "\n") er("line2: " + nextLine + "\n") sys.exit(-1) else: matchResult3 = re.match(oneLineRe, line) if matchResult3: # one line symbol description name = matchResult3.group(1) address = int(matchResult3.group(2), 0) size = int(matchResult3.group(3), 0) alignment = int(matchResult3.group(4), 0) objectFile = matchResult3.group(5) s = Symbol.Symbol(name, address, size, alignment, objectFile, self.getArch()) if s: res.append(s) return res
def parseAndCheckArgs(parser): """ Parse command line arguments and perform some sanity checks """ args = parser.parse_args() if args.x86_bin and args.arm_bin and not args.ppc_bin: if (not args.x86_map) or (not args.arm_map): er("Mapfile parameter missing for some/all archs\n") sys.exit(-1) filesToCheck = [args.x86_bin, args.x86_map, args.arm_bin, args.arm_map] elif args.x86_bin and args.ppc_bin and not args.arm_bin: if (not args.x86_map) or (not args.ppc_map): er("Mapfile parameter missing for some/all archs\n") sys.exit(-1) filesToCheck = [args.x86_bin, args.x86_map, args.ppc_bin, args.ppc_map] elif args.arm_bin and args.ppc_bin and not args.x86_bin: if (not args.arm_map) or (not args.ppc_map): er("Mapfile parameter missing for some/all archs\n") sys.exit(-1) filesToCheck = [args.arm_bin, args.arm_map, args.ppc_bin, args.ppc_map] elif args.arm_bin and args.ppc_bin and args.x86_bin: if (not args.arm_map) or (not args.ppc_map) or (not args.x86_map): er("Mapfile parameter missing for some/all archs\n") sys.exit(-1) filesToCheck = [ args.arm_bin, args.arm_map, args.ppc_bin, args.ppc_map, args.x86_bin, args.x86_map ] else: er("Please provide at least 2 of --x86-bin/--arm-bin/--ppc-bin\n") exit(-1) checkFilesExistence(filesToCheck) return args
def setObjectFile(self, obj, arch): if not symbolObjectFileSanityCheck(obj): er("Failed sanity check on object file during symbol update\n") sys.exit(-1); self._objectFiles[arch] = obj
def compare(self, anotherSymbol): """ Compare two symbols to check if they correspond to the same. They are the same if the name is the same AND if they correspond to the same original object file """ # Quick path: first check the name if self.getName() != anotherSymbol.getName(): return False # Then check the object paths res = None otherObjs = [ anotherSymbol.getObjectFile(Arch.X86), anotherSymbol.getObjectFile(Arch.ARM), anotherSymbol.getObjectFile(Arch.POWER)] for objf1 in self._objectFiles.values(): for objf2 in otherObjs: if objf1 != "NULL" and objf2 != "NULL": cmpstr1 = objf1.split("/")[-1] cmpstr2 = objf2.split("/")[-1] # First handle the special case of user object files that # differs by name because they are for different archs # but are the result of the compilation of the same user # source file # FIXME this is hardcoded for x86-ARM for now, we need a # convention for the user object files created by the # popcorn compiler. Need a better way to handle that when # we get to power8 if cmpstr1.endswith("_x86_64.o"): # s1 is x86 s1_base = cmpstr1.replace("_x86_64.o", "") if (s1_base == cmpstr2.replace(".o", "")): # s2 is arm (v1) res = True continue elif (s1_base == cmpstr2.replace("_aarch64.o", "")): # s2 is arm (v2) res = True continue elif (s1_base == cmpstr2.replace("_powerpc64le.o", "")): # s2 is ppc res = True continue elif cmpstr1.endswith("_aarch64.o"): # s1 is arm (v1) s1_base = cmpstr1.replace("_aarch64.o", "") if (s1_base == cmpstr2.replace("_x86_64.o", "")): # s2 is x86 res = True continue elif (s1_base == cmpstr2.replace("_powerpc64le.o", "")): # s2 is ppc res = True continue elif cmpstr1.endswith("_powerpc64le.o"): # s1 is ppc s1_base = cmpstr1.replace("_powerpc64le.o", "") if (s1_base == cmpstr2.replace("_x86_64.o", "")): # s2 is x86 res = True continue elif (s1_base == cmpstr2.replace(".o", "")): # s2 is arm (v1) res = True continue elif (s1_base == cmpstr2.replace("_aarch64.o", "")): # s2 is arm (v2) res = True continue elif cmpstr1.endswith(".o"): # s1 is arm (v2) s1_base = cmpstr1.replace(".o", "") if (s1_base == cmpstr2.replace("_x86_64.o", "")): # s2 is x86 res = True continue elif (s1_base == cmpstr2.replace("_powerpc64le.o", "")): # s2 is ppc res = True continue if cmpstr1 != cmpstr2: return False else: res = True if res == None: er("Could not find object files to compare...\n") sys.exit(-1) return res