def FuncItems(ea): """ >>> FuncItems(0xff000004) [4278190080L, 4278190084L, 4278190088L, 4278190092L] """ f = disasm.which_func(_d,ea) end = _d.FUNCS[f] return range(f,end,4)
def data_match_funcpair(dumpair, addrpair, log=True): if type(log) == file: matchlog = log else: matchlog = sys.stdout verbose = bool(log) d1,d2 = dumpair a1,a2 = addrpair e1 = d1.FUNCS[a1] e2 = d2.FUNCS[a2] #~ print e1-a1, e2-a2 #~ print d1.addr2sigs[a1] #~ print d2.addr2sigs[a2] ref1 = len(d1.REF2AS[a1]) # how many times it was referenced ref2 = len(d2.REF2AS[a2]) refm = (ref1 + ref2) / 2.0 reftomismatch = abs(ref1 - ref2) / refm if refm else 0 if verbose: print >> matchlog, "refto mismatch: %.2g (%.2g vs %d)" % (reftomismatch, ref1, ref2) ref1 = len(disasm.find_refs(d1, func=a1)) ref2 = len(disasm.find_refs(d2, func=a2)) refm = (ref1 + ref2) / 2.0 reffrommismatch = abs(ref1 - ref2) / refm if refm else 0 if verbose: print >> matchlog, "reffrom mismatch: %.2g (%.2g vs %d)" % (reffrommismatch, ref1, ref2) assert e1-a1 == e2-a2 smallmatch_ok = 0 smallmatch_total = 0 smallmatch = 0 bigmatch_ok = 0 bigmatch_total = 0 bigmatch = 0 datamatches = [] stringmatch_ok = 0 stringmatch_total = 0 stringmatch = 0 if verbose: print >> matchlog, funcpair((d1,d2),(a1,a2)) # refs from this function, matched line-by-line refpairs = [] for off in range(0, e1-a1, 4): for i in range(2): # max 2 refs at a single address; first is raw value, second is pointer try: c1,c2 = None,None c1 = d1.A2REFS[a1+off][i] c2 = d2.A2REFS[a2+off][i] refpairs.append((c1,c2)) except: pass # if there is a single ref (call) to this function, consider it also # (i.e. the function which calls this should also match) c1 = d1.REF2AS[a1] c2 = d2.REF2AS[a2] if len(c1)==1 and len(c2)==1: c1,c2 = disasm.which_func(d1,c1[0]), disasm.which_func(d2,c2[0]) refpairs.append((c1, c2)) if verbose: print >> matchlog, "unique ref: %s called from %s <---> %s called from %s" \ % (disasm.guess_data(d1,a1), disasm.guess_data(d1,c1), disasm.guess_data(d2,a2), disasm.guess_data(d2,c2)) for c1,c2 in refpairs: if c1 is not None or c2 is not None: #~ if verbose: print >> matchlog, "%f:%s --- %f:%s" % (a1+off,c1,a2+off,c2) if c1 < 0x1000 and c2 < 0x1000: smallmatch_total += 1 if c1 == c2: smallmatch_ok += 1 else: s1 = disasm.GuessString(d1, c1) s2 = disasm.GuessString(d2, c2) if s1 or s2: stringmatch_total += 1 ds = 0 if s1 is None or s2 is None: ds = -1 # penalty if one has string and other doesn't else: ratio = difflib.SequenceMatcher(None, s1, s2).ratio() if ratio > 0.7: ds = ratio if len(s1) <= 5: ds /= 2 # penalty for small strings stringmatch_ok += ds if verbose: print >> matchlog, "string pair [match=%.2g]: %s <---> %s" % (ds, repr(s1), repr(s2)) else: bigmatch_total += 1 if c1==c2: bigmatch_ok += 1 elif c1 is not None and c2 is not None: cm = ((c1+c2)/2) if abs(c1 - c2) / cm < 1: bigmatch_ok += 0.5 # same order of magnitude datamatches.append((c1,c2)) if smallmatch_total: smallmatch = smallmatch_ok / smallmatch_total if verbose: print >> matchlog, "small numbers match: %.2g (%d / %d)" % (smallmatch, smallmatch_ok, smallmatch_total) if bigmatch_total: bigmatch = bigmatch_ok / bigmatch_total if verbose: print >> matchlog, "big numbers match: %.2g (%d / %d)" % (bigmatch, bigmatch_ok, bigmatch_total) if stringmatch_total: stringmatch = stringmatch_ok / stringmatch_total if verbose: print >> matchlog, "STRING MATCH: %.2g (%.2g / %d)" % (stringmatch, stringmatch_ok, stringmatch_total) score = (smallmatch-0.5) * math.sqrt(smallmatch_total) + \ (bigmatch-0.5) * math.sqrt(bigmatch_total) + \ 20 * (stringmatch-0.5) * math.sqrt(stringmatch_total) - \ reftomismatch * 2 - \ reffrommismatch * 2 if verbose: print >> matchlog, "score: %.3g\n" % score return score, datamatches