Beispiel #1
0
    def diff_ins(self, di):
        self.di = di

        off_add = {}
        off_rm = {}
        for i in self.di.add_ins:
            off_add[i[0]] = i

        for i in self.di.remove_ins:
            off_rm[i[0]] = i

        nb = 0
        for i in self.bb1.ins:
            ok = False
            if nb in off_add:
                debug("%d ADD %s %s" % (nb, off_add[nb][2].get_name(),
                                        off_add[nb][2].get_operands()))
                self.ins.append(off_add[nb][2])
                setattr(off_add[nb][2], "diff_tag", DIFF_INS_TAG["ADD"])
                del off_add[nb]

            if nb in off_rm:
                debug("%d RM %s %s" % (nb, off_rm[nb][2].get_name(),
                                       off_rm[nb][2].get_operands()))
                self.ins.append(off_rm[nb][2])
                setattr(off_rm[nb][2], "diff_tag", DIFF_INS_TAG["REMOVE"])
                del off_rm[nb]
                ok = True

            if ok == False:
                self.ins.append(i)
                debug("%d %s %s" % (nb, i.get_name(), i.get_operands()))
                setattr(i, "diff_tag", DIFF_INS_TAG["ORIG"])

            nb += 1

        #print nb, off_add, off_rm

        nbmax = nb
        if off_add != {}:
            nbmax = sorted(off_add)[-1]
        if off_rm != {}:
            nbmax = max(nbmax, sorted(off_rm)[-1])

        while nb <= nbmax:
            if nb in off_add:
                debug("%d ADD %s %s" % (nb, off_add[nb][2].get_name(),
                                        off_add[nb][2].get_operands()))
                self.ins.append(off_add[nb][2])
                setattr(off_add[nb][2], "diff_tag", DIFF_INS_TAG["ADD"])
                del off_add[nb]

            if nb in off_rm:
                debug("%d RM %s %s" % (nb, off_rm[nb][2].get_name(),
                                       off_rm[nb][2].get_operands()))
                self.ins.append(off_rm[nb][2])
                setattr(off_rm[nb][2], "diff_tag", DIFF_INS_TAG["REMOVE"])
                del off_rm[nb]

            nb += 1
Beispiel #2
0
    def _init_similarity(self):
        # Add a cache to available method in BB2
        available_vm2_methods = []
        for k in self.filters[METHODS][self.vm2[0]]:
            # B2 not at 0.0 in BB1
            if k.getsha256() not in self.filters[HASHSUM][self.vm1[0]]:
                available_vm2_methods.append(k)
            # B2 matched perfectly in BB1
            else:
                if k not in self.filters[MATCHMETHODS]:
                    self.filters[MATCHMETHODS].append(k)

        # Check if some methods in the first file has been modified
        for j in self.filters[METHODS][self.vm1[0]]:
            debug("SIM FOR %s %s %s" %
                  (j.m.get_class_name(), j.m.get_name(), j.m.get_descriptor()))
            # B1 not at 0.0 in BB2
            if j.getsha256() not in self.filters[HASHSUM][self.vm2[0]]:
                for k in available_vm2_methods:
                    j.similarity(k, self.filters[BASE][FILTER_SIM_METH])
                    if j not in self.filters[DIFFMETHODS]:
                        self.filters[DIFFMETHODS].append(j)
            # B1 matched perfectly in BB2
            else:
                if j not in self.filters[MATCHMETHODS]:
                    self.filters[MATCHMETHODS].append(j)
Beispiel #3
0
    def _init_similarity(self) :
        # Add a cache to available method in BB2
        available_vm2_methods = []
        for k in self.filters[METHODS][self.vm2[0]] :
            # B2 not at 0.0 in BB1
            if k.getsha256() not in self.filters[HASHSUM][self.vm1[0]] :
                available_vm2_methods.append( k )
            # B2 matched perfectly in BB1
            else :
                if k not in self.filters[MATCHMETHODS] :
                    self.filters[MATCHMETHODS].append( k )

        # Check if some methods in the first file has been modified
        for j in self.filters[METHODS][self.vm1[0]] :
            debug("SIM FOR %s %s %s" % (j.m.get_class_name(), j.m.get_name(), j.m.get_descriptor()))
            # B1 not at 0.0 in BB2
            if j.getsha256() not in self.filters[HASHSUM][self.vm2[0]] :
                for k in available_vm2_methods :        
                    j.similarity( k, self.filters[BASE][FILTER_SIM_METH] )
                    if j not in self.filters[DIFFMETHODS] :
                        self.filters[DIFFMETHODS].append(j)
            # B1 matched perfectly in BB2
            else :
                if j not in self.filters[MATCHMETHODS] :
                    self.filters[MATCHMETHODS].append( j )
Beispiel #4
0
    def diff_ins(self, di) :
        self.di = di

        off_add = {}
        off_rm = {}
        for i in self.di.add_ins :
            off_add[ i[0] ] = i

        for i in self.di.remove_ins :
            off_rm[ i[0] ] = i

        nb = 0
        for i in self.bb1.ins :
            ok = False
            if nb in off_add :
                debug("%d ADD %s %s" % (nb, off_add[ nb ][2].get_name(), off_add[ nb ][2].get_operands()))
                self.ins.append( off_add[ nb ][2] )
                setattr( off_add[ nb ][2], "diff_tag", DIFF_INS_TAG["ADD"] )
                del off_add[ nb ]

            if nb in off_rm :
                debug("%d RM %s %s" % (nb, off_rm[ nb ][2].get_name(), off_rm[ nb ][2].get_operands()))
                self.ins.append( off_rm[ nb ][2] )
                setattr( off_rm[ nb ][2], "diff_tag", DIFF_INS_TAG["REMOVE"] )
                del off_rm[ nb ]
                ok = True

            if ok == False :
                self.ins.append( i )
                debug("%d %s %s" % (nb, i.get_name(), i.get_operands()))
                setattr( i, "diff_tag", DIFF_INS_TAG["ORIG"] )

            nb += 1

        #print nb, off_add, off_rm

        nbmax = nb
        if off_add != {} :
            nbmax = sorted(off_add)[-1]
        if off_rm != {} :
            nbmax = max(nbmax, sorted(off_rm)[-1])

        while nb <= nbmax :
            if nb in off_add :
                debug("%d ADD %s %s" % (nb, off_add[ nb ][2].get_name(), off_add[ nb ][2].get_operands()))
                self.ins.append( off_add[ nb ][2] )
                setattr( off_add[ nb ][2], "diff_tag", DIFF_INS_TAG["ADD"] )
                del off_add[ nb ]

            if nb in off_rm :
                debug("%d RM %s %s" % (nb, off_rm[ nb ][2].get_name(), off_rm[ nb ][2].get_operands()))
                self.ins.append( off_rm[ nb ][2] )
                setattr( off_rm[ nb ][2], "diff_tag", DIFF_INS_TAG["REMOVE"] )
                del off_rm[ nb ]

            nb += 1
Beispiel #5
0
    def set_childs(self, abb):
        childs = []
        for c in self.bb.childs:
            if c[2].name in abb:
                debug("SET %s %s " % (c[2], abb[c[2].name]))
                childs.append((c[0], c[1], abb[c[2].name]))
            else:
                debug("SET ORIG %s" % str(c))
                childs.append(c)

        self.childs = childs
Beispiel #6
0
def filter_sort_meth_basic( x, value ) :
    z = sorted(x.iteritems(), key=lambda (k,v): (v,k))

    if get_debug() :
        for i in z :
            debug("\t %s %s %s %d %f" %(i[0].m.get_class_name(), i[0].m.get_name(), i[0].m.get_descriptor(), i[0].m.get_length(), i[1]))
 
    if z[:1][0][1] > value :
        return []

    return z[:1]
Beispiel #7
0
    def set_childs(self, abb) :
        childs = []
        for c in self.bb.childs :
            if c[2].name in abb :
                debug("SET %s %s " % (c[2], abb[ c[2].name ]))
                childs.append( (c[0], c[1], abb[ c[2].name ]) )
            else :
                debug("SET ORIG %s" % str(c))
                childs.append( c )

        self.childs = childs
Beispiel #8
0
    def _init_sort_methods(self) :
#       print "DEBUG DIFF METHODS"
        delete_methods = []
        for j in self.filters[DIFFMETHODS] :
            debug("%s %s %s %d" % (j.m.get_class_name(), j.m.get_name(), j.m.get_descriptor(), j.m.get_length()))
            ret = j.sort( self.filters[BASE][FILTER_SORT_METH], self.filters[BASE][FILTER_SORT_VALUE] )
            if ret == False :
                delete_methods.append( j )

        for j in delete_methods :
            self.filters[ DELETEMETHODS ].append( j )
            pos = self.filters[ DIFFMETHODS ].index( j )
            self.filters[ DIFFMETHODS ].remove( j )
Beispiel #9
0
def filter_sort_meth_basic(x, value):
    z = sorted(x.iteritems(), key=lambda (k, v): (v, k))

    if get_debug():
        for i in z:
            debug("\t %s %s %s %d %f" %
                  (i[0].m.get_class_name(), i[0].m.get_name(),
                   i[0].m.get_descriptor(), i[0].m.get_length(), i[1]))

    if z[:1][0][1] > value:
        return []

    return z[:1]
Beispiel #10
0
    def _init_sort_methods(self):
        #       print "DEBUG DIFF METHODS"
        delete_methods = []
        for j in self.filters[DIFFMETHODS]:
            debug("%s %s %s %d" % (j.m.get_class_name(), j.m.get_name(),
                                   j.m.get_descriptor(), j.m.get_length()))
            ret = j.sort(self.filters[BASE][FILTER_SORT_METH],
                         self.filters[BASE][FILTER_SORT_VALUE])
            if ret == False:
                delete_methods.append(j)

        for j in delete_methods:
            self.filters[DELETEMETHODS].append(j)
            pos = self.filters[DIFFMETHODS].index(j)
            self.filters[DIFFMETHODS].remove(j)
Beispiel #11
0
    def set_childs(self, abb):
        self.childs = self.bb1.childs

        for i in self.ins:
            if i == self.bb2.ins[-1]:

                childs = []
                for c in self.bb2.childs:
                    if c[2].name in abb:
                        debug("SET %s %s" % (c[2], abb[c[2].name]))
                        childs.append((c[0], c[1], abb[c[2].name]))
                    else:
                        debug("SET ORIG %s" % str(c))
                        childs.append(c)

                i.childs = childs
Beispiel #12
0
    def set_childs(self, abb) :
        self.childs = self.bb1.childs

        for i in self.ins :
            if i == self.bb2.ins[-1] :

                childs = []
                for c in self.bb2.childs :
                    if c[2].name in abb :
                        debug("SET %s %s" % (c[2], abb[ c[2].name ]))
                        childs.append( (c[0], c[1], abb[ c[2].name ]) )
                    else :
                        debug("SET ORIG %s" % str(c))
                        childs.append( c )

                i.childs = childs
Beispiel #13
0
    def _init_mark_methods(self) :
        # Change the compression to have a better result for a one <-> one comparaison in order to have a correct percentage
        self.sim.set_compress_type( XZ_COMPRESS )

        # mark diff methods
        for j in self.filters[DIFFMETHODS] :
            debug("%s %s %s" % (j.m.get_class_name(), j.m.get_name(), j.m.get_descriptor()))
                
            # get the first method which match
            k = j.get_meth_first_sort()
               
            # recalculate the similarity to have better percentage with a better algorithm
            v1 = j.quick_similarity( k, self.filters[BASE][FILTER_SIM_METH] ) 

            # filter the mark to eliminate totaly diff method
            v2 = self.filters[BASE][FILTER_MARK_METH]( v1 )
            self.diff_methods_marks.append( v2 )

        # mark match methods
        for m in self.filters[ MATCHMETHODS ] :
            v = self.filters[BASE][FILTER_MARK_METH]( 0.0 )
            self.diff_methods_marks.append( v )
Beispiel #14
0
    def _init_mark_methods(self):
        # Change the compression to have a better result for a one <-> one comparaison in order to have a correct percentage
        self.sim.set_compress_type(XZ_COMPRESS)

        # mark diff methods
        for j in self.filters[DIFFMETHODS]:
            debug("%s %s %s" %
                  (j.m.get_class_name(), j.m.get_name(), j.m.get_descriptor()))

            # get the first method which match
            k = j.get_meth_first_sort()

            # recalculate the similarity to have better percentage with a better algorithm
            v1 = j.quick_similarity(k, self.filters[BASE][FILTER_SIM_METH])

            # filter the mark to eliminate totaly diff method
            v2 = self.filters[BASE][FILTER_MARK_METH](v1)
            self.diff_methods_marks.append(v2)

        # mark match methods
        for m in self.filters[MATCHMETHODS]:
            v = self.filters[BASE][FILTER_MARK_METH](0.0)
            self.diff_methods_marks.append(v)
Beispiel #15
0
def getDiff(C, X, Y, i, j, a, r):
    if i > 0 and j > 0 and X[i - 1] == Y[j - 1]:
        getDiff(C, X, Y, i - 1, j - 1, a, r)
        debug(" " + "%02X" % ord(X[i - 1]))
    else:
        if j > 0 and (i == 0 or C[i][j - 1] >= C[i - 1][j]):
            getDiff(C, X, Y, i, j - 1, a, r)
            a.append((j - 1, Y[j - 1]))
            debug(" + " + "%02X" % ord(Y[j - 1]))
        elif i > 0 and (j == 0 or C[i][j - 1] < C[i - 1][j]):
            getDiff(C, X, Y, i - 1, j, a, r)
            r.append((i - 1, X[i - 1]))
            debug(" - " + "%02X" % ord(X[i - 1]))
Beispiel #16
0
def getDiff(C, X, Y, i, j, a, r):
    if i > 0 and j > 0 and X[i-1] == Y[j-1]:
        getDiff(C, X, Y, i-1, j-1, a, r)
        debug(" " + "%02X" % ord(X[i-1]))
    else:
        if j > 0 and (i == 0 or C[i][j-1] >= C[i-1][j]):
            getDiff(C, X, Y, i, j-1, a, r)
            a.append( (j-1, Y[j-1]) )
            debug(" + " + "%02X" % ord(Y[j-1]))
        elif i > 0 and (j == 0 or C[i][j-1] < C[i-1][j]):
            getDiff(C, X, Y, i-1, j, a, r)
            r.append( (i-1, X[i-1]) )
            debug(" - " + "%02X" % ord(X[i-1]))
Beispiel #17
0
def filter_diff_ins_basic( dbb, sim ) :
    final_add = []
    final_rm = []

    hS = {}
    rS = {}

    X = toString( dbb.bb1, hS, rS )
    Y = toString( dbb.bb2, hS, rS )


    debug("%s %d" % (repr(X), len(X)))
    debug("%s %d" % (repr(Y), len(Y)))

    m = len(X)
    n = len(Y)

    C = LCS( X, Y )
    a = []
    r = []

    getDiff(C, X, Y, m, n, a, r)
    debug(a)
    debug(r)

    debug("DEBUG ADD")
    for i in a :
        debug(" \t %s %s %s" % (i[0], dbb.bb2.ins[ i[0] ].get_name(), dbb.bb2.ins[ i[0] ].get_operands()))
        final_add.append( (i[0], 0, dbb.bb2.ins[ i[0] ]) )

    debug("DEBUG REMOVE")
    for i in r :
        debug(" \t %s %s %s" % (i[0], dbb.bb1.ins[ i[0] ].get_name(), dbb.bb1.ins[ i[0] ].get_operands()))
        final_rm.append( (i[0], 0, dbb.bb1.ins[ i[0] ]) )

    dbb.diff_ins( DiffINS( final_add, final_rm ) )
Beispiel #18
0
def filter_diff_ins_basic(dbb, sim):
    final_add = []
    final_rm = []

    hS = {}
    rS = {}

    X = toString(dbb.bb1, hS, rS)
    Y = toString(dbb.bb2, hS, rS)

    debug("%s %d" % (repr(X), len(X)))
    debug("%s %d" % (repr(Y), len(Y)))

    m = len(X)
    n = len(Y)

    C = LCS(X, Y)
    a = []
    r = []

    getDiff(C, X, Y, m, n, a, r)
    debug(a)
    debug(r)

    debug("DEBUG ADD")
    for i in a:
        debug(" \t %s %s %s" % (i[0], dbb.bb2.ins[i[0]].get_name(),
                                dbb.bb2.ins[i[0]].get_operands()))
        final_add.append((i[0], 0, dbb.bb2.ins[i[0]]))

    debug("DEBUG REMOVE")
    for i in r:
        debug(" \t %s %s %s" % (i[0], dbb.bb1.ins[i[0]].get_name(),
                                dbb.bb1.ins[i[0]].get_operands()))
        final_rm.append((i[0], 0, dbb.bb1.ins[i[0]]))

    dbb.diff_ins(DiffINS(final_add, final_rm))
Beispiel #19
0
    def diff(self, func_sim_bb, func_diff_ins):
        if self.sort_h == [] :
            self.dbb = {}
            self.nbb = {}
            return

        bb1 = self.bb

        ### Dict for diff basic blocks
            ### vm1 basic block : vm2 basic blocks -> value (0.0 to 1.0)
        diff_bb = {}

        ### List to get directly all diff basic blocks
        direct_diff_bb = []

        ### Dict for new basic blocks
        new_bb = {}

        ### Reverse Dict with matches diff basic blocks
        associated_bb = {}

        for b1 in bb1 :
            diff_bb[ bb1[ b1 ] ] = {}

            debug("%s 0x%x" % (b1, bb1[ b1 ].basic_block.end))
            for i in self.sort_h :
                bb2 = i[0].bb
                b_z = diff_bb[ bb1[ b1 ] ]

                bb2hash = i[0].bb_sha256

                # If b1 is in bb2 :
                    # we can have one or more identical basic blocks to b1, we must add them
                if bb1[ b1 ].get_hash() in bb2hash :
                    for equal_bb in bb2hash[ bb1[ b1 ].get_hash() ] :
                        b_z[ equal_bb.basic_block.name ] = 0.0

                # If b1 is not in bb2 :
                    # we must check similarities between all bb2
                else :
                    for b2 in bb2 :
                        b_z[ b2 ] = func_sim_bb( bb1[ b1 ], bb2[ b2 ], self.sim )

                sorted_bb = sorted(b_z.iteritems(), key=lambda (k,v): (v,k))

                debug("\t\t%s" %  sorted_bb[:2])

                for new_diff in sorted_bb :
                    associated_bb[ new_diff[0] ] = bb1[ b1 ].basic_block

                    if new_diff[1] == 0.0 :
                        direct_diff_bb.append( new_diff[0] )

                if sorted_bb[0][1] != 0.0 :
                    diff_bb[ bb1[ b1 ] ] = (bb2[ sorted_bb[0][0] ], sorted_bb[0][1])
                    direct_diff_bb.append( sorted_bb[0][0] )
                else :
                    del diff_bb[ bb1[ b1 ] ]

        for i in self.sort_h :
            bb2 = i[0].bb
            for b2 in bb2 :
                if b2 not in direct_diff_bb :
                    new_bb[ b2 ] = bb2[ b2 ]

        dbb = {}
        nbb = {}
        # Add all different basic blocks
        for d in diff_bb :
            dbb[ d.basic_block.name ] = DiffBB( d.basic_block, diff_bb[ d ][0].basic_block, diff_bb[ d ] )

        # Add all new basic blocks
        for n in new_bb :
            nbb[ new_bb[ n ].basic_block ] = NewBB( new_bb[ n ].basic_block )
            if n in associated_bb :
                del associated_bb[ n ]

        self.dbb = dbb
        self.nbb = nbb

        # Found diff instructions
        for d in dbb :
            func_diff_ins( dbb[d], self.sim )

        # Set new childs for diff basic blocks
            # The instructions will be tag with a new flag "childs"
        for d in dbb :
            dbb[ d ].set_childs( associated_bb )

        # Set new childs for new basic blocks
        for d in nbb :
            nbb[ d ].set_childs( associated_bb )

        # Create and tag all (orig/diff/new) basic blocks
        self.create_bbs()
Beispiel #20
0
    def diff(self, func_sim_bb, func_diff_ins):
        if self.sort_h == []:
            self.dbb = {}
            self.nbb = {}
            return

        bb1 = self.bb

        ### Dict for diff basic blocks
        ### vm1 basic block : vm2 basic blocks -> value (0.0 to 1.0)
        diff_bb = {}

        ### List to get directly all diff basic blocks
        direct_diff_bb = []

        ### Dict for new basic blocks
        new_bb = {}

        ### Reverse Dict with matches diff basic blocks
        associated_bb = {}

        for b1 in bb1:
            diff_bb[bb1[b1]] = {}

            debug("%s 0x%x" % (b1, bb1[b1].basic_block.end))
            for i in self.sort_h:
                bb2 = i[0].bb
                b_z = diff_bb[bb1[b1]]

                bb2hash = i[0].bb_sha256

                # If b1 is in bb2 :
                # we can have one or more identical basic blocks to b1, we must add them
                if bb1[b1].get_hash() in bb2hash:
                    for equal_bb in bb2hash[bb1[b1].get_hash()]:
                        b_z[equal_bb.basic_block.name] = 0.0

                # If b1 is not in bb2 :
                # we must check similarities between all bb2
                else:
                    for b2 in bb2:
                        b_z[b2] = func_sim_bb(bb1[b1], bb2[b2], self.sim)

                sorted_bb = sorted(b_z.iteritems(), key=lambda (k, v): (v, k))

                debug("\t\t%s" % sorted_bb[:2])

                for new_diff in sorted_bb:
                    associated_bb[new_diff[0]] = bb1[b1].basic_block

                    if new_diff[1] == 0.0:
                        direct_diff_bb.append(new_diff[0])

                if sorted_bb[0][1] != 0.0:
                    diff_bb[bb1[b1]] = (bb2[sorted_bb[0][0]], sorted_bb[0][1])
                    direct_diff_bb.append(sorted_bb[0][0])
                else:
                    del diff_bb[bb1[b1]]

        for i in self.sort_h:
            bb2 = i[0].bb
            for b2 in bb2:
                if b2 not in direct_diff_bb:
                    new_bb[b2] = bb2[b2]

        dbb = {}
        nbb = {}
        # Add all different basic blocks
        for d in diff_bb:
            dbb[d.basic_block.name] = DiffBB(d.basic_block,
                                             diff_bb[d][0].basic_block,
                                             diff_bb[d])

        # Add all new basic blocks
        for n in new_bb:
            nbb[new_bb[n].basic_block] = NewBB(new_bb[n].basic_block)
            if n in associated_bb:
                del associated_bb[n]

        self.dbb = dbb
        self.nbb = nbb

        # Found diff instructions
        for d in dbb:
            func_diff_ins(dbb[d], self.sim)

        # Set new childs for diff basic blocks
        # The instructions will be tag with a new flag "childs"
        for d in dbb:
            dbb[d].set_childs(associated_bb)

        # Set new childs for new basic blocks
        for d in nbb:
            nbb[d].set_childs(associated_bb)

        # Create and tag all (orig/diff/new) basic blocks
        self.create_bbs()