def getDefs(self,pairs,indexes,uppdefindex=-1): if indexes==[]:#BUG:please check this situation return [] if uppdefindex==-1: uppdefindex=indexes[0]-1 defs=[] for index,v,left_propa,up,low in pairs[::-1]: if isinstance(self.l[index],LineOfCode) and index<=uppdefindex:continue #note that the index of downward tainting param pointer should be set to the first code line #Or it will be aborted as it matched the "=". if indexes[low-1]<=uppdefindex:continue for i in indexes[up:low][::-1]: print "getDefs():Checking Def:",self.l[i] access=v.accessStr() if re.search(access, self.l[i].codestr): maybe_def=True else: maybe_def=False pointerstr=v.pointerStr() if pointerstr is not None: if re.search(pointerstr, self.l[i].codestr): maybe_def=True if maybe_def: def_type=self.matchDefinitionType(i,v) #if def_type==Syntax.FOR or def_type==Syntax.NORMAL_ASSIGN or def_type==Syntax.OP_ASSIGN or def_type==Syntax.INC or def_type==Syntax.RAW_DEF or def_type==Syntax.SYS_LIB_DEF:#ASSIGN if def_type!=Syntax.NODEF: defs.append((i,v)) print "Find the 100% definition." break elif self.likeArgDef(v,self.l[i].codestr): print "Check Possible Definitions:",self.l[i] if isinstance(self.l[i+1],FunctionCallInfo): if Syntax.isPossibleArgumentDefinition(self.l[i],v): defs.append((i,v)) print "Yes,it is Possible Definitions." print "But just possible,maybe 10%. We should continue search at least another 100% definition for assurance." elif self.likeArgDef(v,self.l[i].codestr): print "Check Possible Definitions:",self.l[i] if isinstance(self.l[i+1],FunctionCallInfo): if Syntax.isPossibleArgumentDefinition(self.l[i],v): defs.append((i,v)) print "Yes,it is Possible Definitions." print "But just possible,maybe 10%. We should continue search at least another 100% definition for assurance." defs.sort(key=lambda x:x[0],reverse=True)#index reversed order return defs
def checkArgDef(self,callsiteIndex,beginIndex,lowerBound,p,rfl,childnum,callee): if p==[] or isinstance(self.l[callsiteIndex+1],LineOfCode):#Abort non-pointer variable. return [],False #Note: funciton name and callee name may not be equal as there exist macro and function pointer #e.g. nread = abfd->iovec->bread (abfd, ptr, size); indexes=self.slice_same_func_lines(callsiteIndex+2,lowerBound)#PlUS TWO("callsiteIndex+2")means #start from the first line of callee function. params=self.l[callsiteIndex+1].get_param_list().split(",") if len(params)-1<childnum: skip_va_arg_nums=childnum-len(params) res=self.check_va_arg_style(skip_va_arg_nums,indexes) if not res: print "BAD arg-->param number match!" print 1/0 varname,indexes=res var= TaintVar(varname,p,rfl) else: #FIX ME following part should change for va_arg case #----------------------------------------------------------------------------------------# varname=params[int(childnum)].split("=")[0] #handle "=" cases like: #args_callback_command (name=0xbfffeb26 "swfdump0.9.2log/exploit_0_0", val=val@entry=0x0) at swfdump.c:200 print self.l[callsiteIndex+1] var=TaintVar(varname,p,rfl) #---------------------------------------------------------------------------------------# pairs=self.findAllReferences(var,indexes,True) pairs.append((callsiteIndex+1,var,True,0,len(indexes))) defs=self.getDefs(pairs,indexes) for d,v in defs: print "%%%",self.l[d] for d,v in defs: #BUG def_type=self.matchDefinitionType(d,v) if def_type==Syntax.FOR: self.TG.linkCrossEdges(beginIndex,d,v.simple_access_str()) jobs=Syntax.generate_for_jobs(d, self.l[d].codestr, v) return self.taintUp(jobs),True if def_type==Syntax.INC:#INC self.TG.linkCrossEdges(beginIndex,d,v.simple_access_str()) jobs.append(TaintJob(d,v)) jobs=list(set(jobs)) return self.taintUp(jobs),True elif def_type==Syntax.RAW_DEF:#RAW_DEF self.TG.linkCrossEdges(beginIndex,d,v.simple_access_str()) return [],True elif def_type==Syntax.NORMAL_ASSIGN: self.TG.linkCrossEdges(beginIndex,d,v.simple_access_str()) assign_handler=AssignmentHandler(self.l,self.TG) jobs=assign_handler.getJobs(v,d,indexes) return self.taintUp(jobs),True elif def_type==Syntax.OP_ASSIGN: self.TG.linkCrossEdges(beginIndex,d,v.simple_access_str()) assign_handler=AssignmentHandler(self.l,self.TG) jobs=assign_handler.getJobs(v,d,indexes) jobs.append(TaintJob(d, v)) return self.taintUp(jobs),True elif def_type == Syntax.RETURN_VALUE_ASSIGN: self.TG.linkCrossEdges(beginIndex,d,v.simple_access_str()) jobs=self.handleReturnAssignDirect(beginIndex,d,v) return jobs elif def_type==Syntax.SYS_LIB_DEF: self.TG.linkCrossEdges(beginIndex,d,v.simple_access_str()) jobs= Syntax.handle_sys_lib_def(d,v,self.l[d].codestr) return self.taintUp(jobs),True else: #job.traceIndex-->l.index(line) #f(t->q) variable:t syntax:*(t->q) #track the access variable t->q #truncate the outter syntax (->q,*) minus ( ->q)= (*) #use new syntax to checkArgDef----- var:t->q,syntax:* #---------------- result=Syntax.isPossibleArgumentDefinition(self.l[d],v) if result is not None: rfl,p,childnum,callee,arg=result jobs,b=self.checkArgDef(d,beginIndex,lowerBound,p,rfl,childnum,callee) if b: return self.taintUp(jobs),True return [],False
def findAllReferences(self, var, indexrange, left_propa): visited=set() pairs=set() if indexrange==[]:return [] indexrange.sort() V=set([(indexrange[0],var,left_propa,0,len(indexrange))]) if left_propa: for temp_lb in range(0,len(indexrange)): temp_index=indexrange[temp_lb] print var.pointerStr() print temp_index,self.l[temp_index] m=re.search(r'(?<![A-Za-z0-9_])'+var.pointerStr()+r"\s*=(?!=)",self.l[temp_index].codestr) if m: result=Syntax.isPossibleArgumentDefinition(self.l[temp_index],var) leftpart=m.group()[:-1].strip() rfl,pat=var.matchAccessPattern(leftpart) if rfl>0 or result is not None: lb=temp_lb+1 else: lb=temp_lb V=set([(indexrange[0],var,left_propa,0,lb)]) break count=0 while len(V)>0: A=set() for index,v,left_p,upperbound,lowerbound in V: #if not v.pointerStr():continue #lp=Syntax.left_ref_propagate_pattern(v) rp=Syntax.right_ref_propagate_pattern(v) print "Continue Check bellow the first found assignment:",self.l[index] for idx in range(upperbound,lowerbound): aIndex=indexrange[idx] if left_p and aIndex<index: print "pass(accelerate)",v.simple_access_str() elif aIndex in visited: print "pass(accelerate)",v.simple_access_str() elif re.search(r"[^=]=[^=]",self.l[aIndex].codestr) is None: print "pass",v.simple_access_str() visited.add(aIndex) else: print "Line Under Check:",self.l[aIndex] if "&hdr;" in self.l[aIndex].codestr: print "Find IT!" match=self.isLeftPropagate(v,self.l[aIndex].codestr) if match is not None: m_left_propgate=match print "find left propagate:",self.l[aIndex] array=m_left_propgate.group().split("=") leftpart=array[0].split()[-1].lstrip("*") rightpart=array[1].strip() rightvar=rightpart.rstrip(";").strip() if rightvar[0]=="(": stack=[] i=1 while i<len(rightvar): if rightvar[i]=="(": stack.append("(") elif rightvar[i]==")": if len(stack)>0: stack.pop() else: rightvar=rightvar[i+1:].strip().lstrip("(").rstrip(")").strip() break i+=1 rfl,pat=v.matchAccessPattern(rightvar) if "*"==pat[-1] or "->" in pat[-1] and aIndex>index: if rfl<=0:rfl=1 q=TaintVar(leftpart,pat,rfl,True)#Note that we should take ref_len in to consideration. lb=lowerbound if idx+1<lowerbound: for temp_lb in range(idx+1,lowerbound): temp_index=indexrange[temp_lb] print v.pointerStr() print q.pointerStr() print temp_index,self.l[temp_index] if re.search(q.pointerStr()+r"\s*[^=]=[^=]",self.l[temp_index].codestr): result=Syntax.isPossibleArgumentDefinition(self.l[temp_index],q) if result is not None: lb=temp_lb+1 else: lb=temp_lb break pairs.add((aIndex,q,True,idx+1,lb)) A.add((aIndex,q,True,idx+1,lb)) visited.add(aIndex) elif rp: print rp m_right_propgate=re.search(rp,self.l[aIndex].codestr) if m_right_propgate: array=m_right_propgate.group().split("=") leftpart=array[0].strip() rightpart=array[1].strip() rightvar=rightpart.rstrip(";").strip() rfl,pat=v.matchAccessPattern(leftpart) # BUG if look downward if rfl==0: print "HEY" if left_p and rfl>0:#v is KILLED here! Skip the following index range, and inform other left propagation lowerbound=indexrange.index(aIndex) #Stop find other references #Because it's killed here. LOWER statements that use it is meaningless break if "*"==pat[-1] or "->" in pat[-1] and aIndex>=index: if rfl<=0:rfl=1 q=TaintVar(rightvar,pat,rfl,True)#Note that we should take ref_len in to consideration. print aIndex,self.l[aIndex] print q print v pairs.add((aIndex,q,False,upperbound,lowerbound)) A.add((aIndex,q,False,upperbound,lowerbound)) visited.add(aIndex) count+=1 V=A pairs=list(pairs) print "refrences list-------" for pair in pairs: print pair[0],pair[1],pair[2],pair[3],pair[4] pairs.sort(lambda x,y:cmp(x[0],y[0])) return pairs
def lastModification(self,job): if job.trace_index==13050:#1293: print "FInd you!" if job.trace_index==0: return [] if isinstance(self.l[job.trace_index], FunctionCallInfo): return None#The input should not be a job in FunctionCallInfo if job.trace_index==1:#begin if job.var.v in self.l[job.trace_index-1].param_list: self.TG.linkInnerEdges(job.trace_index,job.trace_index-1,job.var.simple_access_str()) return [] indexes=self.up_slice(job) if len(indexes)>0: pairs=self.findAllReferences(job.var,indexes,False) pairs.append((indexes[0]-1,job.var,False,0,len(indexes))) #(aIndex,q,True,idx+1,lb) defs=self.getDefs(pairs,indexes) for d,v in defs: print "In list definition:",d,self.l[d] for d,v in defs: def_type=self.matchDefinitionType(d,v) if def_type==Syntax.FOR: self.TG.linkInnerEdges(job.trace_index,d,v.simple_access_str()) jobs=Syntax.generate_for_jobs(d, self.l[d].codestr, v) return list(set(jobs)) if def_type==Syntax.INC:#INC self.TG.linkInnerEdges(job.trace_index,d,v.simple_access_str()) return [TaintJob(d,v)] elif def_type==Syntax.RAW_DEF:#RAW_DEF self.TG.linkInnerEdges(job.trace_index,d,v.simple_access_str()) return [] elif def_type==Syntax.NORMAL_ASSIGN: self.TG.linkInnerEdges(job.trace_index,d,v.simple_access_str()) assign_handler=AssignmentHandler(self.l,self.TG) jobs=assign_handler.getJobs(v,d,indexes) return jobs elif def_type==Syntax.OP_ASSIGN: self.TG.linkInnerEdges(job.trace_index,d,v.simple_access_str()) assign_handler=AssignmentHandler(self.l,self.TG) jobs=assign_handler.getJobs(v,d,indexes) jobs.append(TaintJob(d, v)) return jobs elif def_type == Syntax.RETURN_VALUE_ASSIGN: self.TG.linkInnerEdges(job.trace_index,d,v.simple_access_str()) jobs=self.handleReturnAssignDirect(job.trace_index,d,v) return jobs elif def_type==Syntax.SYS_LIB_DEF: self.TG.linkInnerEdges(job.trace_index,d,v.simple_access_str()) jobs= Syntax.handle_sys_lib_def(d,v,self.l[d].codestr) return list(set(jobs)) else: #job.traceIndex-->l.index(line) #f(t->q) variable:t syntax:*(t->q) #track the access variable t->q #truncate the outter syntax (->q,*) minus ( ->q)= (*) #use new syntax to checkArgDef----- var:t->q,syntax:* #---------------- result=Syntax.isPossibleArgumentDefinition(self.l[d],v) if result is not None: rfl,p,childnum,callee,arg=result if "->headindex" in p and "header_read"==callee: print callee jobs,b=self.checkArgDef(d,job.trace_index,job.trace_index,p,rfl,childnum,callee) if b: return jobs if len(indexes)>0: i=indexes[0]-1 else: i=job.trace_index-1 #l[i] must be an instance of FunctionCallInfo if i==0:#begin if job.var.v in self.l[i].param_list: self.TG.linkInnerEdges(job.trace_index,i,job.var.simple_access_str()) return [] elif self.l[i].get_func_name().split("::")[-1].strip() in self.l[i-1].codestr and self.l[i]==self.l[job.trace_index].get_func_call_info():#call point if job.var.v in self.l[i].param_list: self.TG.linkInnerEdges(job.trace_index,i,job.var.simple_access_str()) return [TaintJob(i,job.var)] return [] elif self.isMacroCall(i-1): if job.var.v in self.l[i].param_list: self.TG.linkInnerEdges(job.trace_index,i,job.var.simple_access_str()) return [TaintJob(i,job.var)] return [] return []