def analyse(self): # construct WPDS for func in self.filecontainer.getFunctions(): if func.getInstructions() == []: func.parse() func.link() res = construct_wpds(self.filecontainer) print "Value Analysis: WPDS constructed" # Compute the result query = wali.WFA() p = wali.getKey("p") accept = wali.getKey("accept") # XXX configurable entrypoint initloc = wali.getKey("f_main") query.addTrans(p, initloc, accept, getNoEffect()) query.set_initial_state(p) query.add_final_state(accept) self.answer = wali.WFA() res.poststar(query, self.answer) print "Value Analysis: post* calculated" # print self.answer # post-process the result self.calling_contexts = calculate_calling_contexts(self.filecontainer, self.answer) print "Value Analysis: calling contexts calculated" # self.mem_accesses = calculate_memaccesses(self.filecontainer, # self.calling_contexts, self.answer) # print self.mem_accesses self.analysed = True
def calculate_memaccesses(f, calling_contexts, answer): """Calculates the memory address accessed by a memory operation. @returns a dict from memory address of instruction -> memory address, or -> dict of loop bounds -> memory address, if function uses loop bounds. """ arch = f.getArch() #helper function def get_ins_type(opcode): return arch.get_ins_type(opcode) #the state of our automaton p = wali.getKey("p") toret = {} for func in f.getFunctions(): if func.getLabel()[0:1] == '_': continue if func.getLabel()[0:2] == '__': continue if func.getLabel() in ['call_gmon_start', 'frame_dummy']: continue loop_bounds = find_loopbounds(func) for ins in func.getInstructions(): if get_ins_type(ins.getOpcode()) in \ ['INSTR_STORE', 'INSTR_LOAD', 'INSTR_PUSH', 'INSTR_POP', 'INSTR_LOADROTATE']: if len(loop_bounds.values()) == 0: k0 = wali.getKey(hex(ins.address)[:-1] + "_") print hex(ins.address)[:-1] + "_" trans = answer.match(p, k0).asList()[0] #print "extending", calling_contexts[func.getLabel()], #print "with", trans.weight(), weight = calling_contexts[func.getLabel()].extend(trans.weight()) #print "=", weight #print hex(ins.address), str(ins), weight #find out address, by evaluating expression, given weight toret[ins.address] = memory_address_from_instruction(f, calling_contexts, weight, ins) else: context_dict = {} #traverse the different loopbound contexts for lb in all_loopbound_combinations(loop_bounds.values()): k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ "_".join( map(str, lb) )) transitions = answer.match(p, k0).asList() assert(len(transitions) in [0, 1]) if len(transitions) == 1: trans = transitions[0] weight = calling_contexts[func.getLabel()].extend(trans.weight()) #print "extending", calling_contexts[func.getLabel()], "with", trans.weight(), #print "=", weight #print hex(ins.address), lb, str(ins), weight addrval = memory_address_from_instruction(f, calling_contexts, weight, ins) if addrval != 'INVALID_ADDRESS': context_dict[lb] = addrval toret[ins.address] = context_dict return toret
def calculate_weight(f, calling_contexts, answer, ins): """Calculates the weight associated with this instruction, given the calling contexts and the answer. """ #the state of our automaton p = wali.getKey("p") func = ins.getFunction() loop_bounds = find_loopbounds(func) if len(loop_bounds.values()) == 0: k0 = wali.getKey(hex(ins.address)[:-1] + "_") #print hex(ins.address)[:-1] + "_" transitions = answer.match(p, k0).asList() if len(transitions) == 0: return None trans = transitions[0] #print "extending", calling_contexts[func.getLabel()], #print "with", trans.weight(), weight = calling_contexts[func.getLabel()].extend(trans.weight()) #Cast to ConstDom weight = dissy.constdom.toConstDom(weight) return weight else: context_dict = {} #traverse the different loopbound contexts #for lb in all_loopbound_combinations(loop_bounds.values()): for lb in loopcontexts_seen[func.getLabel()]: print 'lb', lb.values() k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ "_".join( map(str, lb.values()) )) transitions = answer.match(p, k0).asList() assert(len(transitions) in [0, 1]) if len(transitions) == 1: trans = transitions[0] #import pdb; pdb.set_trace() weight = calling_contexts[func.getLabel()].extend(trans.weight()) #print "extending", calling_contexts[func.getLabel()], "with", trans.weight(), #print "=", weight print hex(ins.address), lb, str(ins), weight #Cast to ConstDom weight = dissy.constdom.toConstDom(weight) context_dict[tuple(lb.values())] = weight return context_dict
def do_valueanalysis(fname): #construct WPDS f = dissy.File.File(fname) for func in f.getFunctions(): func.parse() func.link() res = construct_wpds(f) #Compute the result query = wali.WFA() p = wali.getKey("p") accept = wali.getKey("accept") initloc = wali.getKey("f_main") query.addTrans( p, initloc , accept, getNoEffect() ); query.set_initial_state( p ) query.add_final_state( accept ) answer = wali.WFA() res.poststar(query, answer) #post-process the result calling_contexts = calculate_calling_contexts(f, answer) mem_accesses = calculate_memaccesses(f, calling_contexts, answer) return mem_accesses
def analyse(self): #construct WPDS for func in self.filecontainer.getFunctions(): if func.getInstructions() == []: func.parse() func.link() res = construct_wpds(self.filecontainer) print "Value Analysis: WPDS constructed" #Compute the result query = wali.WFA() p = wali.getKey("p") accept = wali.getKey("accept") #XXX configurable entrypoint initloc = wali.getKey("f_main") query.addTrans(p, initloc, accept, getNoEffect()) query.set_initial_state(p) query.add_final_state(accept) self.answer = wali.WFA() res.poststar(query, self.answer) print "Value Analysis: post* calculated" #print self.answer #post-process the result self.calling_contexts = calculate_calling_contexts( self.filecontainer, self.answer) print "Value Analysis: calling contexts calculated" #self.mem_accesses = calculate_memaccesses(self.filecontainer, # self.calling_contexts, self.answer) #print self.mem_accesses self.analysed = True
def test_basic(self): a = ConstDom("") self.assertEqual(str(a), "{ }") a = ConstDom("r1 = 5") self.assertEqual(str(a), "{ r1 = 5 ; }") b = ConstDom("r2 = 7") self.assertEqual(str(b), "{ r2 = 7 ; }") c = ConstDom("r1 = 5; r2 = 7") self.assertEqual(str(c), "{ r1 = 5 ; r2 = 7 ; }") d = ConstDom("r2 = r2 + 5") self.assertEqual(str(d), "{ r2 = r2 + 5 ; }") self.assertEqual(wali.getKey('*'), wali.getEpsilonKey())
def no_test_funccall(self): """ n0: r1 = 30; n1: f(); n2: r1 = 5; n6: f(); n7: n3: f(): n4: dostuff n5: return """ w = wali.WPDS() p = wali.getKey("p") accept = wali.getKey("accept") n = [] for i in range(0, 8): n += [wali.getKey("n" + str(i))] z = wali.ConstDom().zero() noEffect = wali.SemElemPtr(wali.ConstDom("")) w.add_rule(p, n[0], p, n[1], wali.SemElemPtr(wali.ConstDom("r1 = 30"))) w.add_rule(p, n[1], p, n[3], n[2], noEffect) w.add_rule(p, n[2], p, n[6], wali.SemElemPtr(wali.ConstDom("r1 = 5"))) w.add_rule(p, n[6], p, n[3], n[7], noEffect) w.add_rule(p, n[3], p, n[4], wali.SemElemPtr(wali.ConstDom("r2 = 5"))) w.add_rule(p, n[4], p, n[5], noEffect) w.add_rule(p, n[5], p, noEffect) print w query = wali.WFA() #q = wali.getKey("q") #query.addTrans( p, n[3] , q, noEffect); #query.addTrans( q, n[2] , accept, noEffect); query.addTrans( p, n[0] , accept, noEffect); query.set_initial_state( p ) query.add_final_state( accept ) query.setQuery(wali.WFA.REVERSE) print query print "============== ANSWER ===============" answer = wali.WFA() w.poststar(query, answer) print str(answer) compkey = wali.getKey(p, n[3]) print wali.key2str(compkey) genkeysource = wali.GenKeySource(1, compkey) #print genkeysource.to_string() genkey = wali.getKey( genkeysource ) genkey = wali.getKey( genkeysource ) print str(genkey) #for i in answer.asList(): # print wali.getKeySource(i.fromState()).toString(), \ # "==", \ # wali.getKeySource(i.stack()).toString(), \ # "=>", \ # wali.getKeySource(i.toState()).toString(), \ # " ", \ # i.weight() # #import pdb; pdb.set_trace() abemad = answer.match(genkey, n[2]) #import pdb; pdb.set_trace() print abemad, dir(abemad) print mylist = abemad.asList() print len(mylist), mylist for i in mylist: print "abemad: ", i print "props: ", i.fromState(), i.stack(), i.toState(), i.weight(), i.getDelta() functionsummaries = answer.match(p, wali.getKey('*')).asList() functionsummary = functionsummaries[0] callsite = answer.match(p, n[5]).asList()[0] print callsite.weight(), functionsummary.weight()
def no_test_funccall(self): """ n0: r1 = 30; n1: f(); n2: r1 = 5; n6: f(); n7: n3: f(): n4: dostuff n5: return """ w = wali.WPDS() p = wali.getKey("p") accept = wali.getKey("accept") n = [] for i in range(0, 8): n += [wali.getKey("n" + str(i))] z = wali.ConstDom().zero() noEffect = wali.SemElemPtr(wali.ConstDom("")) w.add_rule(p, n[0], p, n[1], wali.SemElemPtr(wali.ConstDom("r1 = 30"))) w.add_rule(p, n[1], p, n[3], n[2], noEffect) w.add_rule(p, n[2], p, n[6], wali.SemElemPtr(wali.ConstDom("r1 = 5"))) w.add_rule(p, n[6], p, n[3], n[7], noEffect) w.add_rule(p, n[3], p, n[4], wali.SemElemPtr(wali.ConstDom("r2 = 5"))) w.add_rule(p, n[4], p, n[5], noEffect) w.add_rule(p, n[5], p, noEffect) print w query = wali.WFA() #q = wali.getKey("q") #query.addTrans( p, n[3] , q, noEffect); #query.addTrans( q, n[2] , accept, noEffect); query.addTrans(p, n[0], accept, noEffect) query.set_initial_state(p) query.add_final_state(accept) query.setQuery(wali.WFA.REVERSE) print query print "============== ANSWER ===============" answer = wali.WFA() w.poststar(query, answer) print str(answer) compkey = wali.getKey(p, n[3]) print wali.key2str(compkey) genkeysource = wali.GenKeySource(1, compkey) #print genkeysource.to_string() genkey = wali.getKey(genkeysource) genkey = wali.getKey(genkeysource) print str(genkey) #for i in answer.asList(): # print wali.getKeySource(i.fromState()).toString(), \ # "==", \ # wali.getKeySource(i.stack()).toString(), \ # "=>", \ # wali.getKeySource(i.toState()).toString(), \ # " ", \ # i.weight() # #import pdb; pdb.set_trace() abemad = answer.match(genkey, n[2]) #import pdb; pdb.set_trace() print abemad, dir(abemad) print mylist = abemad.asList() print len(mylist), mylist for i in mylist: print "abemad: ", i print "props: ", i.fromState(), i.stack(), i.toState(), i.weight( ), i.getDelta() functionsummaries = answer.match(p, wali.getKey('*')).asList() functionsummary = functionsummaries[0] callsite = answer.match(p, n[5]).asList()[0] print callsite.weight(), functionsummary.weight()
def calculate_calling_contexts(f, answer): """Calculates the combined calling context for each function @returns dict from function names -> calling context as SemElem""" #the state of our automaton p = wali.getKey("p") toret = {} #Traverse the CFG, from main, looking for function calls mainfunc = [func for func in f.getFunctions() if func.getLabel() == 'main'][0] toret[mainfunc.getLabel()] = getNoEffect() funcqueue = [mainfunc] while len(funcqueue) > 0: func = funcqueue.pop() #XXX #k0 = wali.getKey("f_" + func.getLabel()) #transitions = answer.match(p, k0).asList() #print len(transitions) #print transitions[0].weight() #import pdb; pdb.set_trace() #loop_bounds = find_loopbounds(func) print 'traversing func', func.getLabel() for ins in func.getInstructions(): if isinstance(ins.getOutLink(), Function): print 'now at', hex(ins.address) #import pdb; pdb.set_trace() f2 = ins.getOutLink() curcontext = toret.get(f2.getLabel(), None) #print "got", curcontext #traverse the different loopbound contexts #print loopcontexts_seen[func.getLabel()] #for lb in all_loopbound_combinations(loop_bounds.values()): for lb in loopcontexts_seen[func.getLabel()]: #print 'lb', lb.values() k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ "_".join( map(str, lb.values()) )) transitions = answer.match(p, k0).asList() if not len(transitions) in [0,1]: import pdb; pdb.set_trace() assert(len(transitions) in [0,1]) #If reachable in this loop context if len(transitions) == 1: trans = transitions[0] #print "extending", toret.get(func.getLabel()), "with", trans.weight() #if toret.get(func.getLabel()) is None: # import pdb; pdb.set_trace() thiscontext = toret.get(func.getLabel()).extend(trans.weight()) #print "combining", curcontext, "and", thiscontext if curcontext is None: curcontext = thiscontext else: curcontext = curcontext.combine(thiscontext) #print "combine result", curcontext #if curcontext is None: # import pdb; pdb.set_trace() toret[f2.getLabel()] = curcontext #dont calculate calling contexts for things called by library funcs if f2.getLabel()[:2] != '__': funcqueue += [ins.getOutLink()] return toret
def calculate_calling_contexts_test(f, answer): """Calculates the combined calling context for each function @returns dict from function names -> calling context as SemElem""" #the state of our automaton p = wali.getKey("p") toret = {} #Traverse the CFG, from main, looking for function calls mainfunc = [func for func in f.getFunctions() if func.getLabel() == 'main'][0] toret[mainfunc.getLabel()] = getNoEffect() funcqueue = [mainfunc] while len(funcqueue) > 0: func = funcqueue.pop() instructions = func.getInstructions() if func.getLabel()[0:2] != '__': loop_bounds = find_loopbounds(func) #Copy loopbounds cur_loopbounds = dict(loop_bounds) for i in cur_loopbounds: cur_loopbounds[i] = 0 i = 0 indentlevel = -1 while i < len(instructions): ins = instructions[i] #unconditional branch ("Function call") if isinstance(ins.getOutLink(), Function): #TODO check that jump is unconditional print 'now at', hex(ins.address) f2 = ins.getOutLink() curcontext = toret.get(f2.getLabel(), None) #print "got", curcontext #print 'lb', lb k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ "_".join( map(str, cur_loopbounds) )) transitions = answer.match(p, k0).asList() assert(len(transitions) in [0,1]) #If reachable in this loop context if len(transitions) == 1: trans = transitions[0] #print "extending", toret.get(func.getLabel()), "with", trans.weight() thiscontext = toret.get(func.getLabel()).extend(trans.weight()) print "combining", curcontext, "and", thiscontext if curcontext is None: curcontext = thiscontext else: curcontext = curcontext.combine(thiscontext) #print "combine result", curcontext toret[f2.getLabel()] = curcontext funcqueue += [ins.getOutLink()] #Special case: Function call as last instruction, no return from this call - handle as sort of sequential if len(instructions) - 1 == i: i += 1 else: nextins = instructions[i+1] i += 1 continue elif isinstance(ins.getOutLink(), Instruction) and \ not ins.getOpcode() in ['bl', 'bx'] and \ len(ins.getOpcode()) > 1: #Forward jump if ins.getOutLink().address > ins.address: i += 1 continue #Backward jump elif ins.getOutLink().address < ins.address: #examine loopbound, to find out if we unroll, if cur_loopbounds[ins.address] < loop_bounds[ins.address]-1: if cur_loopbounds[ins.address] == 0: indentlevel += 1 #print ' ' * indentlevel + 'Unrolling loop at', hex(ins.address)[:-1], #print cur_loopbounds[ins.address], "/", loop_bounds[ins.address] #keep unrolling, take loop edge nextins = ins.getOutLink() i = instructions.index(nextins) cur_loopbounds[ins.address] += 1 continue else: #loopbound reached, go on sequentially nextins = instructions[i+1] i += 1 #reset loop counter cur_loopbounds[ins.address] = 0 indentlevel -= 1 continue #"Return" elif ins.getOpcode() == 'bx' and ins.getArgs() == 'lr': #TODO arch specific to ARM i += 1 continue #Sequential progression elif i < len(instructions)-1: nextins = instructions[i+1] i += 1 else: i += 1 continue
def construct_wpds(f): global loopcontexts_seen arch = f.getArch() wpds = wali.WPDS() #the state of our automaton p = wali.getKey("p") #Helper function, to wrap into the WALi framework, and cache effects instructionEffectCache = {} def getInstructionEffect(ins, func): if (ins, func) in instructionEffectCache: return instructionEffectCache[(ins, func)] else: effect = wali.SemElemPtr( ConstDom( arch.getInstructionEffect(ins, func), arch.getInstructionStackEffect(ins, func) ) ) instructionEffectCache[(ins, func)] = effect return effect for func in f.getFunctions(): instructions = func.getInstructions() if func.getLabel()[0:2] != '__': loop_bounds = find_loopbounds(func) #Copy loopbounds cur_loopbounds = dict(loop_bounds) for i in cur_loopbounds: cur_loopbounds[i] = 0 loopcontexts_seen[func.getLabel()] = [dict(cur_loopbounds)] #Function start transition ins = instructions[0] k0 = wali.getKey("f_" + func.getLabel()) k1 = wali.getKey(hex(ins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) effect = getNoEffect() wpds.add_rule(p, k0, p, k1, effect) i = 0 indentlevel = -1 while i < len(instructions): ins = instructions[i] #print i, ins #unconditional branch ("Function call") if isinstance(ins.getOutLink(), Function): #TODO check that jump is unconditional k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) k1 = wali.getKey("f_" + ins.getOutLink().getLabel()) effect = getInstructionEffect(ins, func) #Special case: Function call as last instruction, no return from this call - handle as sort of sequential #if len(instructions) - 1 == i: if ins.opcode == 'b': wpds.add_rule(p, k0, p, k1, effect) i += 1 else: nextins = instructions[i+1] i += 1 k2 = wali.getKey(hex(nextins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) wpds.add_rule(p, k0, p, k1, k2, effect) continue #jump #TODO arch specific to ARM elif isinstance(ins.getOutLink(), Instruction) and \ not ins.getOpcode() in ['bl', 'bx'] and \ len(ins.getOpcode()) > 1: #Forward jump if ins.getOutLink().address > ins.address: #Jump taken nextins = ins.getOutLink() k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) k1 = wali.getKey(hex(nextins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) effect = getInstructionEffect(ins, func) wpds.add_rule(p, k0, p, k1, effect) #TODO arch specific to ARM if len(ins.getOpcode()) > 1: #Conditional jump, possibility of sequential nextins = instructions[i+1] k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) k1 = wali.getKey(hex(nextins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) effect = getInstructionEffect(ins, func) wpds.add_rule(p, k0, p, k1, effect) i += 1 continue #Backward jump elif ins.getOutLink().address < ins.address: #examine loopbound, to find out if we unroll, if cur_loopbounds[ins.address] < loop_bounds[ins.address]-1: if cur_loopbounds[ins.address] == 0: indentlevel += 1 #print ' ' * indentlevel + 'Unrolling loop at', hex(ins.address)[:-1], #print cur_loopbounds[ins.address], "/", loop_bounds[ins.address] #keep unrolling, take loop edge nextins = ins.getOutLink() i = instructions.index(nextins) k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) cur_loopbounds[ins.address] += 1 loopcontexts_seen[func.getLabel()] += [dict(cur_loopbounds)] k1 = wali.getKey(hex(nextins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) effect = getInstructionEffect(ins, func) wpds.add_rule(p, k0, p, k1, effect) continue else: #loopbound reached, go on sequentially nextins = instructions[i+1] i += 1 k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) #reset loop counter cur_loopbounds[ins.address] = 0 loopcontexts_seen[func.getLabel()] += [dict(cur_loopbounds)] k1 = wali.getKey(hex(nextins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) effect = getInstructionEffect(ins, func) wpds.add_rule(p, k0, p, k1, effect) indentlevel -= 1 continue else: assert (False) #Jump to self is crazy #"Return" elif ins.getOpcode() == 'bx' and ins.getArgs() == 'lr': #TODO arch specific to ARM k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) effect = getInstructionEffect(ins, func) wpds.add_rule(p, k0, p, effect) i += 1 continue #Sequential progression elif i < len(instructions)-1: nextins = instructions[i+1] i += 1 else: i += 1 continue k0 = wali.getKey(hex(ins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) k1 = wali.getKey(hex(nextins.address)[:-1] + "_" + \ loopcontext_to_str(cur_loopbounds)) effect = getInstructionEffect(ins, func) wpds.add_rule(p, k0, p, k1, effect) return wpds
func.link() res = construct_wpds(f) print "=================== WPDS constructed ==============" if verbose: #Sort output, by address ;-) lines = str(res).split("\n") lines.sort() verbosefile.write("=== WPDS ===\n") verbosefile.write('\n'.join(lines)) verbosefile.write("\n\n\n") #TEST query = wali.WFA() p = wali.getKey("p") accept = wali.getKey("accept") initloc = wali.getKey("f_main") query.addTrans( p, initloc , accept, getNoEffect() ); query.set_initial_state( p ) query.add_final_state( accept ) if verbose: print "============== ANSWER ===============" answer = wali.WFA() res.poststar(query, answer) if verbose: