def diff_side(dumpair, addrpair, score, sources, level=3): d1,d2 = dumpair a1,a2 = addrpair try: f1 = d1.Fun(a1) f2 = d2.Fun(a2) except: sc = "←(%s)→" % ('{0:^+.2g}'.format(score)) head = "<h%d>%s in %s %s %s in %s</h%d>" % (level, disasm.guess_data(d1,a1), d1.bin, sc, disasm.guess_data(d2,a2), d2.bin, level) srch = "<small>" for srcpair,sco in sources: srch += diff_side(dumpair, srcpair, sco, [], level=level+1) srch += "</small>" return head + srch A = fileutil.capture(f1.disasm)[1].split("\n") B = fileutil.capture(f2.disasm)[1].split("\n") #~ a = [l.split("\t")[0] + " " + string.join(l.split("\t")[2:], "\t")[:60] for l in a] #~ b = [l.split("\t")[0] + " " + string.join(l.split("\t")[2:], "\t")[:60] for l in b] a = [string.join(l.split("\t")[2:], "\t")[:60] for l in A] b = [string.join(l.split("\t")[2:], "\t")[:60] for l in B] h = difflib.HtmlDiff() sc = "←[%s]→" % ('{0:^+.2g}'.format(score)) head = "<h%d>%s in %s %s %s in %s</h%d>" % (level, f1.name, f1.dump.bin, sc, f2.name, f2.dump.bin, level) htm = head + h.make_table(a, b, f1.dump.bin, f2.dump.bin) for i,l in enumerate(A): try: addr = int(l.split("\t")[0][:-1],16) except: continue r = r'<td class="diff_header" id="from([0-9_]+)%d">%d</td>' % (i+1,i+1) new = r'<td class="diff_header" id="from\\1%d">%x:</td>' % (i+1,addr) htm = re.sub(r,new,htm) for i,l in enumerate(B): try: addr = int(l.split("\t")[0][:-1],16) except: continue r = r'<td class="diff_header" id="to([0-9_]+)%d">%d</td>' % (i+1,i+1) new = r'<td class="diff_header" id="to\\1%d">%x:</td>' % (i+1,addr) htm = re.sub(r,new,htm) return htm
def data_match_funcpair(dumpair, addrpair, log=True): if type(log) == file: matchlog = log else: matchlog = sys.stdout verbose = bool(log) d1,d2 = dumpair a1,a2 = addrpair e1 = d1.FUNCS[a1] e2 = d2.FUNCS[a2] #~ print e1-a1, e2-a2 #~ print d1.addr2sigs[a1] #~ print d2.addr2sigs[a2] ref1 = len(d1.REF2AS[a1]) # how many times it was referenced ref2 = len(d2.REF2AS[a2]) refm = (ref1 + ref2) / 2.0 reftomismatch = abs(ref1 - ref2) / refm if refm else 0 if verbose: print >> matchlog, "refto mismatch: %.2g (%.2g vs %d)" % (reftomismatch, ref1, ref2) ref1 = len(disasm.find_refs(d1, func=a1)) ref2 = len(disasm.find_refs(d2, func=a2)) refm = (ref1 + ref2) / 2.0 reffrommismatch = abs(ref1 - ref2) / refm if refm else 0 if verbose: print >> matchlog, "reffrom mismatch: %.2g (%.2g vs %d)" % (reffrommismatch, ref1, ref2) assert e1-a1 == e2-a2 smallmatch_ok = 0 smallmatch_total = 0 smallmatch = 0 bigmatch_ok = 0 bigmatch_total = 0 bigmatch = 0 datamatches = [] stringmatch_ok = 0 stringmatch_total = 0 stringmatch = 0 if verbose: print >> matchlog, funcpair((d1,d2),(a1,a2)) # refs from this function, matched line-by-line refpairs = [] for off in range(0, e1-a1, 4): for i in range(2): # max 2 refs at a single address; first is raw value, second is pointer try: c1,c2 = None,None c1 = d1.A2REFS[a1+off][i] c2 = d2.A2REFS[a2+off][i] refpairs.append((c1,c2)) except: pass # if there is a single ref (call) to this function, consider it also # (i.e. the function which calls this should also match) c1 = d1.REF2AS[a1] c2 = d2.REF2AS[a2] if len(c1)==1 and len(c2)==1: c1,c2 = disasm.which_func(d1,c1[0]), disasm.which_func(d2,c2[0]) refpairs.append((c1, c2)) if verbose: print >> matchlog, "unique ref: %s called from %s <---> %s called from %s" \ % (disasm.guess_data(d1,a1), disasm.guess_data(d1,c1), disasm.guess_data(d2,a2), disasm.guess_data(d2,c2)) for c1,c2 in refpairs: if c1 is not None or c2 is not None: #~ if verbose: print >> matchlog, "%f:%s --- %f:%s" % (a1+off,c1,a2+off,c2) if c1 < 0x1000 and c2 < 0x1000: smallmatch_total += 1 if c1 == c2: smallmatch_ok += 1 else: s1 = disasm.GuessString(d1, c1) s2 = disasm.GuessString(d2, c2) if s1 or s2: stringmatch_total += 1 ds = 0 if s1 is None or s2 is None: ds = -1 # penalty if one has string and other doesn't else: ratio = difflib.SequenceMatcher(None, s1, s2).ratio() if ratio > 0.7: ds = ratio if len(s1) <= 5: ds /= 2 # penalty for small strings stringmatch_ok += ds if verbose: print >> matchlog, "string pair [match=%.2g]: %s <---> %s" % (ds, repr(s1), repr(s2)) else: bigmatch_total += 1 if c1==c2: bigmatch_ok += 1 elif c1 is not None and c2 is not None: cm = ((c1+c2)/2) if abs(c1 - c2) / cm < 1: bigmatch_ok += 0.5 # same order of magnitude datamatches.append((c1,c2)) if smallmatch_total: smallmatch = smallmatch_ok / smallmatch_total if verbose: print >> matchlog, "small numbers match: %.2g (%d / %d)" % (smallmatch, smallmatch_ok, smallmatch_total) if bigmatch_total: bigmatch = bigmatch_ok / bigmatch_total if verbose: print >> matchlog, "big numbers match: %.2g (%d / %d)" % (bigmatch, bigmatch_ok, bigmatch_total) if stringmatch_total: stringmatch = stringmatch_ok / stringmatch_total if verbose: print >> matchlog, "STRING MATCH: %.2g (%.2g / %d)" % (stringmatch, stringmatch_ok, stringmatch_total) score = (smallmatch-0.5) * math.sqrt(smallmatch_total) + \ (bigmatch-0.5) * math.sqrt(bigmatch_total) + \ 20 * (stringmatch-0.5) * math.sqrt(stringmatch_total) - \ reftomismatch * 2 - \ reffrommismatch * 2 if verbose: print >> matchlog, "score: %.3g\n" % score return score, datamatches