def lookupAddress(self, fmt, address): """Find data of an appropriate format at a (possibly symbolic) address fmt can influence the results lookupAddress(single, address_of_v) -> v.x lookupAddress(Vector, address_of_v) -> v if fmt is unknown (or no match is found), ????? """ base = None memOffset = 0 others = [] if isinstance(address, algebra.Literal): memOffset = address.value elif isinstance(address, algebra.Expression) and address.op == '+': memOffset = address.constant.value if address.constant else 0 for term in address.args: if isinstance(term.type, basicTypes.Pointer): if base: raise Exception('adding pointers') base = term else: others.append(term) elif isinstance(address.type, basicTypes.Pointer): if basicTypes.isIndexable(address.type.pointedType): base = address memOffset = 0 else: return algebra.Symbol(address.type.target if address.type.target else address.name, address.type.pointedType) if not base: #check for trig lookup if fmt == basicTypes.single and memOffset in self.bindings['trigtables']: try: angle = others[0].args[0].args[0] return algebra.Symbol('{}Table({})'.format(self.bindings['trigtables'][memOffset], angle)) except: pass pair = self.relativeToGlobals(memOffset) if pair: base = algebra.Symbol('raw', basicTypes.Pointer(pair[0].type, pair[0].name)) memOffset = pair[1] else: # no idea what we are looking at, process it anyway return buildExpr('@', fmt, address) if basicTypes.isIndexable(base.type.pointedType): if memOffset >= self.getSize(base.type.pointedType): raise Exception('trying to look up address {:#x} in {} (only {:#x} bytes)'.format( memOffset, base.type.pointedType, self.getSize(base.type.pointedType))) if base.type.target: return self.subLookup(fmt, algebra.Symbol(base.type.target, base.type.pointedType), memOffset, others) else: return self.subLookup(fmt, base, memOffset, others) elif base.type.target and memOffset == 0 and not others: return algebra.Symbol(base.type.target, base.type.pointedType) else: return buildExpr('@', fmt, address)
def subLookup(self, fmt, base, address, others=[]): """Recursively find data at the given address from the start of a type""" if isinstance(base.type, basicTypes.Array): spacing = self.getSize(base.type.pointedType) index = algebra.Literal(address // spacing) canIndex = True for o in others: if (isinstance(o, algebra.Expression) and o.op == '*' and o.constant and o.constant.value == spacing): index = buildExpr( '+', index, algebra.Expression.arithmeticMerge('*', o.args)) else: canIndex = False break if canIndex: element = buildExpr('[', base, index) if basicTypes.isIndexable(base.type.pointedType): return self.subLookup(fmt, element, address % spacing) else: return element else: return buildExpr( '@', fmt, algebra.Expression.arithmeticMerge( '+', [base, algebra.Literal(address)] + others)) parentStruct = None if isinstance(base.type, basicTypes.Pointer): parentStruct = base.type.pointedType elif isinstance(base.type, str): parentStruct = base.type if parentStruct and parentStruct in self.bindings['structs']: members = self.bindings['structs'][parentStruct].members try: bestOffset = max(x for x in members if x <= address) except ValueError: # nothing less pass else: newBase = buildExpr('.', base, algebra.Symbol(*members[bestOffset])) if address < bestOffset + self.getSize(newBase.type): if basicTypes.isIndexable(newBase.type): return self.subLookup(fmt, newBase, address - bestOffset, others) if not others: #TODO account for reading the lower short of a word, etc. return newBase if others: return buildExpr( '@', fmt, algebra.arithmeticMerge( '+', [base, algebra.Literal(address)] + others)) else: return buildExpr( '.', base, algebra.Symbol( '{}_{:#x}'.format(basicTypes.getCode(fmt), address), fmt))
def __init__(self, bindings, args=[]): self.states = defaultdict(list) self.bindings = bindings self.argList = [] #arguments beyond the given ones self.now = Context([Branch()]) self.write(Register.R0, algebra.Literal(0)) self.write(Register.SP, algebra.Symbol('SP')) self.write(Register.RA, algebra.Symbol('RA')) self.write(SpecialRegister.Compare, algebra.Symbol('bad_CC')) for reg, name, fmt in args: showName = name if name else VariableHistory.getName(reg) self.argList.append(reg) self.write(reg, algebra.Symbol(showName, fmt))
def read(self, var, fmt=basicTypes.unknown): """Retrive (an appropriate representation of) the value in a register and track its usage var should be a register or Stack() object Depending on the expected format, the stored value may be altered substantially """ if var == Register.R0: #zero is zero, shouldn't remember type info return algebra.Literal(0) if var in self.states: uncertain = False for st in reversed(self.states[var]): if self.now.implies( st.context)[0]: # this state definitely occurred if uncertain: st.explicit = True break else: if isinstance(st.value, algebra.Literal): if isinstance(fmt, basicTypes.EnumType): st.value = self.getEnumValue( fmt, st.value.value) elif basicTypes.isIndexable(fmt): st.value = self.lookupAddress(fmt, st.value) elif st.value.type in [ basicTypes.unknown, basicTypes.bad ]: st.value.type = fmt return st.value elif self.now.isCompatibleWith(st.context): st.explicit = True uncertain = True return algebra.Symbol(VariableHistory.getName(var), fmt) else: symName = VariableHistory.getName(var) if VariableHistory.couldBeArg(var): self.argList.append(var) symName = 'arg_' + symName self.states[var].append( VariableState(self.getName(var), algebra.Symbol(symName, fmt), self.now)) return self.states[var][-1].value
def relativeToGlobals(self, offset): try: bestOffset = max(x for x in self.bindings['globals'] if x <= offset) except ValueError: return None # nothing less than this value base = algebra.Symbol(*self.bindings['globals'][bestOffset]) relOffset = offset - bestOffset if relOffset >= self.getSize(base.type): return None else: return base, relOffset
def makeSymbolic(name, mipsData, bindings, arguments=[]): """Produce symbolic representation of the logic of a MIPS function""" address, mips, loops = mipsData baseBranch = Branch() currContext = Context([baseBranch]) #no branches yet branchList = [baseBranch] #branches and their current lines updates = set() booleans = {} #will hold the symbols associated with branches delayed = None mainCode = CodeBlock(currContext) currBlock = mainCode history = VariableHistory(bindings, arguments) for lineNum, instr in enumerate(mips): if lineNum in updates: # different set of active branches, start a new block of code newContext = Context( [b for b in branchList if 0 <= b.line <= lineNum], lineNum) newParent = currBlock while True: imp, rel = newContext.implies(newParent.context) if imp: break else: newParent = newParent.parent currBlock = CodeBlock(newContext, newParent, rel) # continue an elif chain, or start a new one if newParent.children and not rel.isCompatibleWith( newParent.children[-1].relative): currBlock.elseRelative = rel.processElif( newParent.elifAccumulator) else: newParent.elifAccumulator = [ list(br)[0] for br in rel.cnf if len(br) == 1 ] newParent.children.append(currBlock) history.now = newContext #TODO prune now-irrelevant choices from branches so this doesn't take forever on long functions try: result = conversionList[instr.opcode](instr, history) except ValueError: currBlock.code.append((InstrResult.unhandled, instr)) else: if result[0] in [ InstrResult.branch, InstrResult.likely, InstrResult.jump ]: if result[1]: booleans[lineNum] = result[1] delayed = (result[0], lineNum + 1 + result[-1]) continue elif result[0] in [InstrResult.function, InstrResult.end]: delayed = result continue elif result[0] != InstrResult.none: currBlock.code.append(result) if delayed: if delayed[0] in [ InstrResult.branch, InstrResult.likely, InstrResult.jump ]: branchType, branchDest = delayed currBranches = [ x for x in branchList if 0 <= x.line <= lineNum - 1 ] if branchType == InstrResult.jump: for b in currBranches: b.line = branchDest updates.add(branchDest) else: for b in currBranches: b.line = -1 branchList.append( b.branchOff(lineNum - 1, True, lineNum + 1)) branchList.append( b.branchOff(lineNum - 1, False, branchDest)) updates.add(lineNum + 1) updates.add(branchDest) elif delayed[0] == InstrResult.function: argList = [] funcCall = delayed[1] if funcCall in bindings['functions']: title = bindings['functions'][funcCall].name for reg, argName, fmt in bindings['functions'][ funcCall].args: argList.append((argName, history.read(reg, fmt))) history.markBad(reg) else: try: title = 'fn%06x' % funcCall except: title = funcCall for reg in [ Register.A0, FloatRegister.F12, Register.A1, FloatRegister.F14, Register.A2, Register.A3 ]: if history.isValid(reg): argList.append((reg.name, history.read(reg))) history.markBad(reg) for s in (basicTypes.Stack(i) for i in range(0x10, 0x28, 4)): if history.isValid(s): argList.append(('stack_{:x}'.format(s.offset), history.read(s))) history.markBad(s) else: break marker = algebra.Symbol( 'returnValue_{:x}'.format((lineNum - 1) * 4), basicTypes.bad) currBlock.code.append( (InstrResult.function, title, argList, marker)) history.write(Register.V0, marker) history.write(FloatRegister.F0, marker) elif delayed[0] == InstrResult.end: if history.isValid(Register.V0): returnValue = history.read(Register.V0) elif history.isValid(FloatRegister.F0): returnValue = history.read(FloatRegister.F0) else: returnValue = None currBlock.code.append((InstrResult.end, returnValue)) delayed = None return mainCode, history, booleans
def getEnumValue(self, fmt, val): try: subname = self.bindings['enums'][fmt.enum].values[val] except: subname = '_{:#x}'.format(val) return buildExpr('.', fmt.enum, algebra.Symbol(subname, fmt))
def markBad(self, var): self.write( var, algebra.Symbol('bad_%s' % VariableHistory.getName(var), basicTypes.bad))