예제 #1
0
파일: match.py 프로젝트: alexdu/ARM-console
def diff_side(dumpair, addrpair, score, sources, level=3):
    d1,d2 = dumpair
    a1,a2 = addrpair
    try:
        f1 = d1.Fun(a1)
        f2 = d2.Fun(a2)
    except:
        sc = "←(%s)→" % ('{0:^+.2g}'.format(score))
        head = "<h%d>%s in %s %s %s in %s</h%d>" % (level, disasm.guess_data(d1,a1), d1.bin, sc, disasm.guess_data(d2,a2), d2.bin, level)
        srch = "<small>"
        for srcpair,sco in sources:
            srch += diff_side(dumpair, srcpair, sco, [], level=level+1)            
        srch += "</small>"
        return head + srch

    A = fileutil.capture(f1.disasm)[1].split("\n")
    B = fileutil.capture(f2.disasm)[1].split("\n")
    #~ a = [l.split("\t")[0] + " " + string.join(l.split("\t")[2:], "\t")[:60] for l in a]
    #~ b = [l.split("\t")[0] + " " + string.join(l.split("\t")[2:], "\t")[:60] for l in b]
    a = [string.join(l.split("\t")[2:], "\t")[:60] for l in A]
    b = [string.join(l.split("\t")[2:], "\t")[:60] for l in B]
    h = difflib.HtmlDiff()
    sc = "&larr;[%s]&rarr;" % ('{0:^+.2g}'.format(score))
    head = "<h%d>%s in %s %s %s in %s</h%d>" % (level, f1.name, f1.dump.bin, sc, f2.name, f2.dump.bin, level)
    htm = head + h.make_table(a, b, f1.dump.bin, f2.dump.bin)
    for i,l in enumerate(A):
        try: addr = int(l.split("\t")[0][:-1],16)
        except: continue
        r = r'<td class="diff_header" id="from([0-9_]+)%d">%d</td>' % (i+1,i+1)
        new = r'<td class="diff_header" id="from\\1%d">%x:</td>' % (i+1,addr)
        htm = re.sub(r,new,htm)

    for i,l in enumerate(B):
        try: addr = int(l.split("\t")[0][:-1],16)
        except: continue
        r = r'<td class="diff_header" id="to([0-9_]+)%d">%d</td>' % (i+1,i+1)
        new = r'<td class="diff_header" id="to\\1%d">%x:</td>' % (i+1,addr)
        htm = re.sub(r,new,htm)
    return htm
예제 #2
0
파일: match.py 프로젝트: alexdu/ARM-console
def data_match_funcpair(dumpair, addrpair, log=True):
    if type(log) == file:
        matchlog = log
    else:
        matchlog = sys.stdout
    verbose = bool(log)

    d1,d2 = dumpair
    a1,a2 = addrpair
    e1 = d1.FUNCS[a1]
    e2 = d2.FUNCS[a2]
    #~ print e1-a1, e2-a2
    #~ print d1.addr2sigs[a1]
    #~ print d2.addr2sigs[a2]

    ref1 = len(d1.REF2AS[a1]) # how many times it was referenced
    ref2 = len(d2.REF2AS[a2])
    refm = (ref1 + ref2) / 2.0
    reftomismatch = abs(ref1 - ref2) / refm if refm else 0
    if verbose: print >> matchlog, "refto mismatch: %.2g (%.2g vs %d)" % (reftomismatch, ref1, ref2)
    ref1 = len(disasm.find_refs(d1, func=a1))
    ref2 = len(disasm.find_refs(d2, func=a2))
    refm = (ref1 + ref2) / 2.0
    reffrommismatch = abs(ref1 - ref2) / refm if refm else 0
    if verbose: print >> matchlog, "reffrom mismatch: %.2g (%.2g vs %d)" % (reffrommismatch, ref1, ref2)
    

    assert e1-a1 == e2-a2
    
    smallmatch_ok = 0
    smallmatch_total = 0
    smallmatch = 0

    bigmatch_ok = 0
    bigmatch_total = 0
    bigmatch = 0

    datamatches = []

    stringmatch_ok = 0
    stringmatch_total = 0
    stringmatch = 0
    if verbose: print >> matchlog, funcpair((d1,d2),(a1,a2))
    
    # refs from this function, matched line-by-line
    refpairs = []
    for off in range(0, e1-a1, 4):
        for i in range(2): # max 2 refs at a single address; first is raw value, second is pointer
            try:
                c1,c2 = None,None
                c1 = d1.A2REFS[a1+off][i]
                c2 = d2.A2REFS[a2+off][i]
                refpairs.append((c1,c2))
            except: 
                pass

    # if there is a single ref (call) to this function, consider it also
    # (i.e. the function which calls this should also match)
    c1 = d1.REF2AS[a1]
    c2 = d2.REF2AS[a2]
    if len(c1)==1 and len(c2)==1:
        c1,c2 = disasm.which_func(d1,c1[0]), disasm.which_func(d2,c2[0])
        refpairs.append((c1, c2))
        if verbose:
            print >> matchlog, "unique ref: %s called from %s <---> %s called from %s" \
                                                         % (disasm.guess_data(d1,a1),
                                                            disasm.guess_data(d1,c1),
                                                            disasm.guess_data(d2,a2),
                                                            disasm.guess_data(d2,c2))

    for c1,c2 in refpairs:
        if c1 is not None or c2 is not None:
                
            #~ if verbose: print >> matchlog, "%f:%s --- %f:%s" % (a1+off,c1,a2+off,c2)
            
            if c1 < 0x1000 and c2 < 0x1000:
                smallmatch_total += 1
                if c1 == c2: smallmatch_ok += 1
            else:
                s1 = disasm.GuessString(d1, c1)
                s2 = disasm.GuessString(d2, c2)

                if s1 or s2:
                    stringmatch_total += 1
                    ds = 0
                    if s1 is None or s2 is None:
                        ds = -1 # penalty if one has string and other doesn't
                    else:
                        ratio = difflib.SequenceMatcher(None, s1, s2).ratio()
                        if ratio > 0.7: ds = ratio
                        if len(s1) <= 5: ds /= 2       # penalty for small strings
                    stringmatch_ok += ds
                    if verbose:
                        print >> matchlog, "string pair [match=%.2g]: %s <---> %s" % (ds, repr(s1), repr(s2))
                else:
                    bigmatch_total += 1
                    if c1==c2:
                        bigmatch_ok += 1
                    elif c1 is not None and c2 is not None:
                        cm = ((c1+c2)/2)
                        if abs(c1 - c2) / cm < 1: 
                            bigmatch_ok += 0.5 # same order of magnitude
                            datamatches.append((c1,c2))

    if smallmatch_total:
        smallmatch = smallmatch_ok / smallmatch_total
        if verbose: print >> matchlog, "small numbers match: %.2g (%d / %d)" % (smallmatch, smallmatch_ok, smallmatch_total)

    if bigmatch_total:
        bigmatch = bigmatch_ok / bigmatch_total
        if verbose: print >> matchlog, "big numbers match: %.2g (%d / %d)" % (bigmatch, bigmatch_ok, bigmatch_total)

    if stringmatch_total:
        stringmatch = stringmatch_ok / stringmatch_total
        if verbose: print >> matchlog, "STRING MATCH: %.2g (%.2g / %d)" % (stringmatch, stringmatch_ok, stringmatch_total)
    
    score = (smallmatch-0.5) * math.sqrt(smallmatch_total) + \
            (bigmatch-0.5) * math.sqrt(bigmatch_total) + \
            20 * (stringmatch-0.5) * math.sqrt(stringmatch_total) - \
            reftomismatch * 2 - \
            reffrommismatch * 2
            
    if verbose: print >> matchlog, "score: %.3g\n" % score
    return score, datamatches