def test_sign_deterministic():
    """
    Make sure that signing the same data results in the same result
    """
    res1 = sign(b'3kl4jfklsdjfklasjf8934j9sjdf9adfkalsdjflkasjdflkasdf',\
            num_perms=20)
    res2 = sign(b'3kl4jfklsdjfklasjf8934j9sjdf9adfkalsdjflkasjdflkasdf',\
            num_perms=20)
    assert res1 == res2
Beispiel #2
0
def test_sign_deterministic():
    """
    Make sure that signing the same data results in the same result
    """
    res1 = sign(b'3kl4jfklsdjfklasjf8934j9sjdf9adfkalsdjflkasjdflkasdf',\
            num_perms=20)
    res2 = sign(b'3kl4jfklsdjfklasjf8934j9sjdf9adfkalsdjflkasjdflkasdf',\
            num_perms=20)
    assert res1 == res2
Beispiel #3
0
def test_sign_similars():
    """
    Sign similar strings and expect similar signatures.
    Sign very different strings and expect zero similarity.
    """
    s1 = sign(b'hello world he2llo world', 16)
    s2 = sign(b'hello world he1llo world', 16)
    assert calc_sim(s1, s2) > 6

    s1 = sign(b'akjdflkasjflkasjlfkasjdflkjaslkdfjaslkjfsaklfdjaslkjdfsf', 16)
    s2 = sign(b'4039582903850923850928345982309589023845823458230945', 16)
    assert calc_sim(s1, s2) == 0
def test_sign_similars():
    """
    Sign similar strings and expect similar signatures.
    Sign very different strings and expect zero similarity.
    """
    s1 = sign(b'hello world he2llo world',16)
    s2 = sign(b'hello world he1llo world',16)
    assert calc_sim(s1,s2) > 6

    s1 = sign(b'akjdflkasjflkasjlfkasjdflkjaslkdfjaslkjfsaklfdjaslkjdfsf',16)
    s2 = sign(b'4039582903850923850928345982309589023845823458230945',16)
    assert calc_sim(s1,s2) == 0
Beispiel #5
0
def test_short_input():
    """
    See what happens if sign or slow_sign are given a too short input (below 4
    bytes).
    """
    # Should raise an error:
    with pytest.raises(Catalog1Error):
        sign(b'123', 16)

    # Should raise an error:
    with pytest.raises(Catalog1Error):
        slow_sign(b'123', 16)

    # Will not raise an error:
    sign(b'1234', 16)
    slow_sign(b'1234', 16)
Beispiel #6
0
def test_get_similars_basic(fdb_mem):
    """
    Try to run get_similars and see if it doesn't crash.
    """
    res = fdb_mem.get_similars(b'adfasdfasdf',5)
    assert isinstance(res,list)

    # Add a function and request a function similar to that function. We expect
    # to get one result:
    fdb_mem.add_function('func_name',b'func_data','func_comment')
    res = fdb_mem.get_similars(b'func_data',num_similars=3)
    assert len(res) == 1

    # Add another function, and request functions similar to that function. We
    # expect to get all the functions, where func2 will be first on the list:
    fdb_mem.add_function('func_name2',b'func_data2','func_comment2')
    res = fdb_mem.get_similars(b'func_data2',num_similars=3)
    assert len(res) == 2

    # Hash should match:
    assert res[0].func_hash == strong_hash(b'func_data2')
    assert res[1].func_hash != strong_hash(b'func_data2')

    # Signature should match:
    assert res[0].func_sig == sign(b'func_data2',NUM_HASHES)

    # Request for just one similars, and make sure we get only one, although
    # the DB contains two functions:
    res = fdb_mem.get_similars(b'func_data',num_similars=1)
    assert len(res) == 1
Beispiel #7
0
    def add_function(self,func_name,func_data,func_comment):
        """
        Add a (Reversed) function to the database.
        """
        self._check_is_open()
        c = self._conn.cursor()
        try:

            s = sign(func_data,self._num_hashes)
            func_hash = strong_hash(func_data)


            cmd_insert = \
                    """INSERT OR REPLACE into funcs 
                        (func_hash,func_name,func_comment"""

            for i in range(self._num_hashes):
                cmd_insert += ',c' + str(i+1) + ' '

            cmd_insert += ') values (?,?,?' + (',?' * self._num_hashes) + ');'

            c.execute(cmd_insert,[\
                    sqlite3.Binary(func_hash),func_name,func_comment] + s)

            # Commit functions inserted to the db if _funcs_pending is large
            # enough:
            if self._funcs_pending > FUNCTION_BATCH:
                self.commit_funcs()

        except sqlite3.Error:
            # Give up previous transaction, and start a new one.
            c.execute('ROLLBACK')
            c.execute('BEGIN TRANSACTION')
def test_short_input():
    """
    See what happens if sign or slow_sign are given a too short input (below 4
    bytes).
    """
    # Should raise an error:
    with pytest.raises(Catalog1Error):
        sign(b'123',16)

    # Should raise an error:
    with pytest.raises(Catalog1Error):
        slow_sign(b'123',16)

    # Will not raise an error:
    sign(b'1234',16)
    slow_sign(b'1234',16)
Beispiel #9
0
    def add_function(self, func_name, func_data, func_comment):
        """
        Add a (Reversed) function to the database.
        """
        self._check_is_open()
        c = self._conn.cursor()
        try:

            s = sign(func_data, self._num_hashes)
            func_hash = strong_hash(func_data)


            cmd_insert = \
                    """INSERT OR REPLACE into funcs 
                        (func_hash,func_name,func_comment"""

            for i in range(self._num_hashes):
                cmd_insert += ',c' + str(i + 1) + ' '

            cmd_insert += ') values (?,?,?' + (',?' * self._num_hashes) + ');'

            c.execute(cmd_insert,[\
                    sqlite3.Binary(func_hash),func_name,func_comment] + s)

            # Commit functions inserted to the db if _funcs_pending is large
            # enough:
            if self._funcs_pending > FUNCTION_BATCH:
                self.commit_funcs()

        except sqlite3.Error:
            # Give up previous transaction, and start a new one.
            c.execute('ROLLBACK')
            c.execute('BEGIN TRANSACTION')
def test_sign_long_data():
    """
    Sign long data
    """
    res = sign((b'asdfklasjdf') * 40,num_perms=20)
    assert len(res) == 20
    for x in res:
        assert isdword(x)
Beispiel #11
0
def test_sign_long_data():
    """
    Sign long data
    """
    res = sign((b'asdfklasjdf') * 40, num_perms=20)
    assert len(res) == 20
    for x in res:
        assert isdword(x)
Beispiel #12
0
def test_sign_basic():
    """
    Sign some strings.
    """
    res = sign(b'afdasdklfjaskljdfaklsjdf', num_perms=16)
    assert len(res) == 16
    for x in res:
        assert isdword(x)

    res = \
        sign(b'3kl4jfklsdjfklasjf8934j9sjdf9adfkalsdjflkasjdflkasdf',num_perms=20)
    assert len(res) == 20
    for x in res:
        assert isdword(x)

    res = sign(b'4095809348529384523904582390485092384509283', num_perms=32)
    assert len(res) == 32
    for x in res:
        assert isdword(x)
def test_sign_basic():
    """
    Sign some strings.
    """
    res = sign(b'afdasdklfjaskljdfaklsjdf',num_perms=16)
    assert len(res) == 16
    for x in res:
        assert isdword(x)

    res = \
        sign(b'3kl4jfklsdjfklasjf8934j9sjdf9adfkalsdjflkasjdflkasdf',num_perms=20)
    assert len(res) == 20
    for x in res:
        assert isdword(x)

    res = sign(b'4095809348529384523904582390485092384509283',num_perms=32)
    assert len(res) == 32
    for x in res:
        assert isdword(x)
def test_slow_matches_fast():
    """
    Make sure that the two implementations of catalog1 (The python and the C
    one) match.
    """
    datas = []
    datas.append(b'klsfjsalkdfjlksajfdlksaj340985390485ksldjflksdflksdjf')
    datas.append(b'abc' * 205)
    datas.append(b'349085092384590903485309485' * 300)
    data = b'kslajflksajfaiosueroiqwuroiqwer9034851283904lkfjsalkfasdfsf'

    for data in datas:
        assert slow_sign(data,4) == sign(data,4)
Beispiel #15
0
    def test_match_special_func(self):
        """
        Try to find similarity for the special function. We expect to find it.
        """
        sims = self.fdb_mem.get_similars(self.special_func_data,5)
        # We assume that only the special function will return:
        assert len(sims) == 1

        assert sims[0].func_hash == strong_hash(self.special_func_data)
        assert sims[0].func_name == self.special_func_name
        assert sims[0].func_comment == self.special_func_comment
        assert sims[0].func_sig == sign(self.special_func_data,NUM_HASHES)
        assert sims[0].func_grade == NUM_HASHES
Beispiel #16
0
def test_slow_matches_fast():
    """
    Make sure that the two implementations of catalog1 (The python and the C
    one) match.
    """
    datas = []
    datas.append(b'klsfjsalkdfjlksajfdlksaj340985390485ksldjflksdflksdjf')
    datas.append(b'abc' * 205)
    datas.append(b'349085092384590903485309485' * 300)
    data = b'kslajflksajfaiosueroiqwuroiqwer9034851283904lkfjsalkfasdfsf'

    for data in datas:
        assert slow_sign(data, 4) == sign(data, 4)
Beispiel #17
0
    def test_match_like_special_func(self):
        """
        Try to find similarity for a function that is just a bit different from
        the special function. We expect to find the special function.
        """
        func_data = self.special_func_data
        # Change some bytes randomly:
        for i in range(5):
            func_data = change_random_byte(func_data)

        sims = self.fdb_mem.get_similars(func_data,5)
        # We assume that only the special function will return:
        assert len(sims) == 1

        assert sims[0].func_hash == strong_hash(self.special_func_data)
        assert sims[0].func_name == self.special_func_name
        assert sims[0].func_comment == self.special_func_comment
        assert sims[0].func_sig == sign(self.special_func_data,NUM_HASHES)
        assert sims[0].func_grade < NUM_HASHES
Beispiel #18
0
 def get_dist(i):
     return num_matches(res[i].func_sig,sign(f1,NUM_HASHES))
Beispiel #19
0
    def get_similars(self,func_data,num_similars):
        """
        Get a list of at most num_similars similar functions to a given
        function. The list will be ordered by similarity. The first element is
        the most similar one.
        """
        self._check_is_open()
        c = self._conn.cursor()
        try:
            # A list to keep results:
            res_list = []

            s = sign(func_data,self._num_hashes)
            func_hash = strong_hash(func_data)


            # Get all potential candidates for similarity:
            lselects = ['SELECT * FROM funcs WHERE c' + str(i+1) + '=?' \
                    for i in range(self._num_hashes)]
            # Also search for exact match (Using strong hash):
            sel_hash = 'SELECT * FROM funcs WHERE func_hash=?'
            lselects.append(sel_hash)
            selects = "\nUNION\n".join(lselects)

            # Find best matching rows
            matching = 'SELECT func_hash,func_name,func_comment,'

            sig_vals = ",".join(['c' + str(i+1) for i in range(self._num_hashes)])
            matching += sig_vals

            # Make an expression (c1=sig[0]) + (c2=sig[1]) + ...
            # Which will be the grade of every row (The amount of matches of the
            # signature).
            sig_sum = ' + '.join(\
                    ['(c' + str(i+1) + '=?)' for i in range(self._num_hashes)])
            matching += ',(' + sig_sum + ') AS grade '

            matching += 'FROM (' + selects + ') '

            # Find the num_similars rows with highest grade:
            matching += 'ORDER BY grade DESC LIMIT ?'

            c.execute(matching,s + s + [func_hash,num_similars])

            for res in c.fetchall():
                res_hash,res_name,res_comment = res[:3]
                # We don't want to include the last superficial column grade, this
                # is why we have -1 here:
                res_sig = list(res[3:-1])
                # The function's grade:
                grade = res[-1]
                sres = DBSimilar(\
                        func_hash=res_hash,\
                        func_name=res_name,\
                        func_comment=res_comment,\
                        func_sig=res_sig,\
                        func_grade=grade)

                # If we have exact match (Using strong hash), we move the result to
                # the beginning of res_list. Otherwise, we just append to the end.
                # The exact match will always be at the beginning.
                if res_hash == func_hash:
                    res_list.insert(0,sres)
                else:
                    res_list.append(sres)

            return res_list

        except sqlite3.Error:
            # Give up previous transaction, and start a new one.
            c.execute('ROLLBACK')
            c.execute('BEGIN TRANSACTION')
Beispiel #20
0
    def get_similars(self, func_data, num_similars):
        """
        Get a list of at most num_similars similar functions to a given
        function. The list will be ordered by similarity. The first element is
        the most similar one.
        """
        self._check_is_open()
        c = self._conn.cursor()
        try:
            # A list to keep results:
            res_list = []

            s = sign(func_data, self._num_hashes)
            func_hash = strong_hash(func_data)

            # Get all potential candidates for similarity:
            lselects = ['SELECT * FROM funcs WHERE c' + str(i+1) + '=?' \
                    for i in range(self._num_hashes)]
            # Also search for exact match (Using strong hash):
            sel_hash = 'SELECT * FROM funcs WHERE func_hash=?'
            lselects.append(sel_hash)
            selects = "\nUNION\n".join(lselects)

            # Find best matching rows
            matching = 'SELECT func_hash,func_name,func_comment,'

            sig_vals = ",".join(
                ['c' + str(i + 1) for i in range(self._num_hashes)])
            matching += sig_vals

            # Make an expression (c1=sig[0]) + (c2=sig[1]) + ...
            # Which will be the grade of every row (The amount of matches of the
            # signature).
            sig_sum = ' + '.join(\
                    ['(c' + str(i+1) + '=?)' for i in range(self._num_hashes)])
            matching += ',(' + sig_sum + ') AS grade '

            matching += 'FROM (' + selects + ') '

            # Find the num_similars rows with highest grade:
            matching += 'ORDER BY grade DESC LIMIT ?'

            c.execute(matching, s + s + [func_hash, num_similars])

            for res in c.fetchall():
                res_hash, res_name, res_comment = res[:3]
                # We don't want to include the last superficial column grade, this
                # is why we have -1 here:
                res_sig = list(res[3:-1])
                # The function's grade:
                grade = res[-1]
                sres = DBSimilar(\
                        func_hash=res_hash,\
                        func_name=res_name,\
                        func_comment=res_comment,\
                        func_sig=res_sig,\
                        func_grade=grade)

                # If we have exact match (Using strong hash), we move the result to
                # the beginning of res_list. Otherwise, we just append to the end.
                # The exact match will always be at the beginning.
                if res_hash == func_hash:
                    res_list.insert(0, sres)
                else:
                    res_list.append(sres)

            return res_list

        except sqlite3.Error:
            # Give up previous transaction, and start a new one.
            c.execute('ROLLBACK')
            c.execute('BEGIN TRANSACTION')