Exemple #1
0
def func_similarity_astLevel(db1, funcs, db2, func_name, suffix_tree_obj, worksheet):
    # @db1 待比对数据库
    # @db2 漏洞特征数据库
    # @func_name 目标函数名
    
    target_func = get_function_ast_root(db2, func_name)
    return_type = get_function_return_type(db2, target_func)  # 获取目标函数返回值类型
    param_list = get_function_param_list(db2, target_func)  # 获取目标函数参数类型列表
    
    # funcs = getAllFuncs(db1) #获取所有函数
    filter_funcs = filter_functions(db1, funcs, return_type, param_list) # 过滤待比较函数
    
    ret = serializedAST(db2).genSerilizedAST(target_func)
    pattern1 = ";".join(ret[0][2:])
    pattern2 = ";".join(ret[1][2:])
    pattern3 = ";".join(ret[2][2:])
    pattern4 = ";".join(ret[3][2:])  
    
    for func in filter_funcs:
        ast_root = get_function_ast_root(db1, func.properties[u'name'])
        s1 = serializedAST(db1, True, True).genSerilizedAST(ast_root)[0][:-1]
        s2 = serializedAST(db1, False, True).genSerilizedAST(ast_root)[0][:-1]
        s3 = serializedAST(db1, True, False).genSerilizedAST(ast_root)[0][:-1]
        s4 = serializedAST(db1, False, False).genSerilizedAST(ast_root)[0][:-1] 
        
        report = {}
        if suffix_tree_obj.search(s1, pattern1):
            report['distinct_type_and_const'] = True
        
        if suffix_tree_obj.search(s2, pattern2):
            report['distinct_const_no_type'] = True
        
        if suffix_tree_obj.search(s3, pattern3):
            report['distinct_type_no_const'] = True
        
        if suffix_tree_obj.search(s4, pattern4):
            report['distinct_type_no_const'] = True
        
        if report['distinct_type_and_const'] or  report['distinct_const_no_type']\
            or report['distinct_type_no_const'] or report['no_type_no_const']:
            
            file = get_function_file(db1, func.properties[u'name'])
            worksheet.append(
                             (func_name, file, func.properties[u'name'],report['distinct_type_and_const'],
                              report['distinct_const_no_type'], report['distinct_type_no_const'],
                              report['distinct_type_no_const'] ))
Exemple #2
0
def get_type_mapping_table(neo4j_db, func_name):
    ast_root = get_function_ast_root(neo4j_db, func_name)
    if ast_root is None:
        print u"节点不存在"
        return {'other':'v'}
    else:
        ser = serializedAST(neo4j_db)
        ser.genSerilizedAST(ast_root)
        return ser.variable_maps
Exemple #3
0
def get_software_var_map(soft, port):
    neo4j_db = Graph("http://127.0.0.1:%d/db/data/" % port)
    sql_db = sqlite3.connect("/home/bert/Documents/data/" + soft + ".db")
    sql_db.execute('''create table if not exists %s(
            func_id INT PRIMARY KEY,
            func_name CHAR(100) NOT NULL,
            file CHAR(200) NOT NULL,
            var_map TEXT NOT NULL,
            ast_type_const TEXT NOT NULL,
            ast_type_only TEXT NOT NULL,
            ast_const_only TEXT NOT NULL,
            ast_no_type_const TEXT NOT NULL,
            no_mapping TEXT NOT NULL)''' % soft)
    sql_db.commit()

    funcs = get_all_functions(neo4j_db)
    open("" + len(funcs).__str__(), "w")
    print "get all functions OK:", len(funcs)

    for func in funcs:
        # 查重
        ret = sql_db.execute("select * from %s where func_id=?" % soft,
                             (func._id, ))
        if ret.fetchone():
            continue

        print "[%s] processing %s " % (datetime.datetime.now().strftime(
            "%y-%m-%d %H:%M:%S"), func.properties[u'name'])

        try:
            ast_root = get_function_ast_root(neo4j_db, func)
            func_file = get_function_file(neo4j_db, func)
            ser = serializedAST(neo4j_db)
            ret = ser.genSerilizedAST(ast_root)
            var_map = ser.variable_maps
            ast1 = ";".join(ret[0])
            ast2 = ";".join(ret[1])
            ast3 = ";".join(ret[2])
            ast4 = ";".join(ret[3])
            ast5 = ";".join(ret[4])
        except Exception, e:
            traceback.print_exc()

        try:
            sql_db.execute(
                'insert into %s values(?, ?, ?, ?, ?, ?, ?, ?,?)' % soft,
                (func._id, func.properties[u'name'], func_file,
                 var_map.__str__(), ast1, ast2, ast3, ast4, ast5))
            sql_db.commit()
        except Exception, e:
            print e
Exemple #4
0
def patch_segement_comp(db1, vuln_func, db2, patch_segement, suffix_tree_obj):
    
    start_time =  time.time()
    
     #序列化AST返回值是一个数组,0元素是序列化的AST字符串,1元素是节点个数,AST字符串以;结尾,需要去掉结尾的;
    pattern1 = serializedAST(db2, True, True).genSerilizedAST(patch_segement)[0][:-1]
    pattern2 = serializedAST(db2, False, True).genSerilizedAST(patch_segement)[0][:-1] 
    pattern3 = serializedAST(db2, True, False).genSerilizedAST(patch_segement)[0][:-1]
    pattern4 = serializedAST(db2, False, False).genSerilizedAST(patch_segement)[0][:-1]
    
    #delete FunctionDef and CompoundStatement node
    prefix_str = r"^FunctionDef\([0-9]+\);CompoundStatement\([0-9]+\);"
    pattern1 = re.sub(prefix_str, "", pattern1)
    pattern2 = re.sub(prefix_str, "", pattern2)
    pattern3 = re.sub(prefix_str, "", pattern3)
    pattern4 = re.sub(prefix_str, "", pattern4)
    
    s1 = serializedAST(db1, True, True).genSerilizedAST(vuln_func)[0][:-1]
    s2 = serializedAST(db1, False, True).genSerilizedAST(vuln_func)[0][:-1]
    s3 = serializedAST(db1, True, False).genSerilizedAST(vuln_func)[0][:-1]
    s4 = serializedAST(db1, False, False).genSerilizedAST(vuln_func)[0][:-1]
    
    report = {}
    if suffix_tree_obj.search(s1, pattern1):
        report['distinct_type_and_const'] = True
    else:
        report['distinct_type_and_const'] = False
        
    if suffix_tree_obj.search(s2, pattern2):
        report['distinct_const_no_type'] = True
    else:
        report['distinct_const_no_type'] = False
        
    if suffix_tree_obj.search(s3, pattern3):
        report['distinct_type_no_const'] = True
    else:
        report['distinct_type_no_const'] = False
        
    if suffix_tree_obj.search(s4, pattern4):
        report['no_type_no_const'] = True
    else:
        report['no_type_no_const'] = False
    
    end_time = time.time()
    cost = round(end_time - start_time, 2)
    
    return report, cost
def func_similarity_segement_level(db1, funcs, db2, func_name, db_table):
    # @db1 待比对数据库
    # @db2 代码段数据库
    # @func_name 代码段构成的函数名
    neo4j_db1 = Graph(db1)
    neo4j_db2 = Graph(db2)
    suffix_tree_obj = suffixtree()
   
    #sqlite
    db_conn = sqlite3.connect("/home/bert/Documents/data/soft_test.db")
    db_conn.execute("""create table if not exists %s(
        func_id INT PRIMARY KEY,
        func_name CHAR(100) NOT NULL,
        file CHAR(200) NOT NULL,
        vuln_segement CHAR(100) NOT NULL,
        distinct_type_and_const BOOLEAN,
        distinct_const_no_type BOOLEAN,
        distinct_type_no_const BOOLEAN,
        no_type_no_const BOOLEAN)""" % db_table)
    db_conn.commit()
    
    target_func = get_function_ast_root(neo4j_db2, func_name)
    if target_func is None:
        print "%s is not found" % func_name
        return
    
    ret =  serializedAST(neo4j_db2).genSerilizedAST(target_func)
    
    pattern1 = ";".join(ret[0][2:])
    pattern2 = ";".join(ret[1][2:])
    pattern3 = ";".join(ret[2][2:])
    pattern4 = ";".join(ret[3][2:])
    
    for func in funcs:
        print "[%s] processing %s VS %s" % (
                                   datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S"),
                                   func[1], func_name)
        
        ast_root = get_function_ast_root(neo4j_db1, func[0])
        if ast_root is None:
            print "function not found:", func[0], func[1]
        
        tmp = serializedAST(neo4j_db1).genSerilizedAST(ast_root)
        s1 = ";".join(tmp[0])
        s2 = ";".join(tmp[1])
        s3 = ";".join(tmp[2])
        s4 = ";".join(tmp[3]) 
              
        report = {}
        try:
            if suffix_tree_obj.search(s1, pattern1):
                report['distinct_type_and_const'] = True
            else:
                report['distinct_type_and_const'] = False
            
            if suffix_tree_obj.search(s2, pattern2):
                report['distinct_const_no_type'] = True
            else:
                report['distinct_const_no_type'] = False
            
            if suffix_tree_obj.search(s3, pattern3):
                report['distinct_type_no_const'] = True
            else:
                report['distinct_type_no_const'] = False
            
            if suffix_tree_obj.search(s4, pattern4):
                report['no_type_no_const'] = True
            else:
                report['no_type_no_const'] = False
                
            query = "insert into %s values(?,?,?,?,?,?,?,?)" % db_table
            db_conn.execute(query, (func[0], func[1], func[2], func_name, report['distinct_type_and_const'],
                              report['distinct_const_no_type'],
                              report['distinct_type_no_const'],
                              report['no_type_no_const'])
                            )
            db_conn.commit()
            
        except Exception,e:
            log_file = open("suffix_tree_error.log","a")
            log_file.writelines(
                                [datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S") + " " + e.__str__(),
                                 s1, pattern1])
            log_file.flush()
            log_file.close()
Exemple #6
0
def vuln_patch_compare(conn, neo4jdb, vuln_info, worksheet, suffix_tree_obj):
    
    cve_info = vuln_info.get_cve_info(conn)
    print "[%s] processing %s" % (datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S"), cve_info.cveid)
    
    vuln_name = cve_info.cveid.replace("-", "_").upper() + "_VULN_" + vuln_info.vuln_func
    patch_name = cve_info.cveid.replace("-", "_").upper() + "_PATCHED_" + vuln_info.vuln_func
    
    start_time = time.time()
    status = ""
    vuln_func = get_function_ast_root(neo4jdb, vuln_name)
    if vuln_func is None:
        status = "vuln_func_not_found"
        
        line = process_line(conn, vuln_info, status, None, 0)
        worksheet.append(line)
        return
    
    patched_func = get_function_ast_root(neo4jdb, patch_name)
    if patched_func is None:
        status = "patched_func_not_found"
        
        line = process_line(conn, vuln_info, status, None, 0)
        worksheet.append(line)
        return
    
    #序列化AST返回值是一个数组,0元素是序列化的AST字符串,1元素是节点个数,AST字符串以;结尾,需要去掉结尾的;
    pattern1 = serializedAST(neo4jdb, True, True).genSerilizedAST(vuln_func)[0][:-1]
    pattern2 = serializedAST(neo4jdb, False, True).genSerilizedAST(vuln_func)[0][:-1] 
    pattern3 = serializedAST(neo4jdb, True, False).genSerilizedAST(vuln_func)[0][:-1]
    pattern4 = serializedAST(neo4jdb, False, False).genSerilizedAST(vuln_func)[0][:-1]
    
    #delete FunctionDef and CompoundStatement node
    prefix_str = r"^FunctionDef\([0-9]+\);CompoundStatement\([0-9]+\);"
    pattern1 = re.sub(prefix_str, "", pattern1)
    pattern2 = re.sub(prefix_str, "", pattern2)
    pattern3 = re.sub(prefix_str, "", pattern3)
    pattern4 = re.sub(prefix_str, "", pattern4)
    
    s1 = serializedAST(neo4jdb, True, True).genSerilizedAST(patched_func)[0][:-1]
    s2 = serializedAST(neo4jdb, False, True).genSerilizedAST(patched_func)[0][:-1]
    s3 = serializedAST(neo4jdb, True, False).genSerilizedAST(patched_func)[0][:-1]
    s4 = serializedAST(neo4jdb, False, False).genSerilizedAST(patched_func)[0][:-1]
    
    report = {}
    if suffix_tree_obj.search(s1, pattern1):
        report['distinct_type_and_const'] = True
    else:
        report['distinct_type_and_const'] = False
        
    if suffix_tree_obj.search(s2, pattern2):
        report['distinct_const_no_type'] = True
    else:
        report['distinct_const_no_type'] = False
        
    if suffix_tree_obj.search(s3, pattern3):
        report['distinct_type_no_const'] = True
    else:
        report['distinct_type_no_const'] = False
        
    if suffix_tree_obj.search(s4, pattern4):
        report['no_type_no_const'] = True
    else:
        report['no_type_no_const'] = False
       
    status = "success"
    end_time = time.time()
    cost = round(end_time - start_time, 2)
    
    line = process_line(conn, vuln_info, status, report, cost)
    worksheet.append(line)
     
    return
Exemple #7
0
def segement_ast_similarity_process(vuln_name, patch_name, neo4jdb,
                                    org_func_name, type_mapping, worksheet,
                                    suffix_tree_obj):
    start_time = time.time()
    print "[%s] processing %s" % (datetime.datetime.now().strftime(
        "%y-%m-%d %H:%M:%S"), vuln_name + " vs " + patch_name)

    #检查数据库里面是否可以找到该函数
    vuln_func = get_function_ast_root(neo4jdb, vuln_name)
    if vuln_func is None:
        line = (vuln_name, patch_name, "vuln_func_not_found", "-", "-", "-",
                "-", 0, org_func_name, type_mapping.__str__())
        worksheet.append(line)
        return

    #检查数据库里面是否可以找到该函数
    patched_func = get_function_ast_root(neo4jdb, patch_name)
    if patched_func is None:
        line = (vuln_name, patch_name, "patch_func_not_found", "-", "-", "-",
                "-", 0, org_func_name, type_mapping.__str__())
        worksheet.append(line)
        return

    #序列化AST返回值是一个数组,0元素是序列化的AST字符串,1元素是节点个数,AST字符串以;结尾,需要去掉结尾的;
    o1 = serializedAST(neo4jdb)
    o1.data_type_mapping = type_mapping

    ret = o1.genSerilizedAST(vuln_func)
    #delete FunctionDef and CompoundStatement node [2:]
    pattern1 = ";".join(ret[0][2:])
    pattern2 = ";".join(ret[1][2:])
    pattern3 = ";".join(ret[2][2:])
    pattern4 = ";".join(ret[3][2:])

    tmp = o1.genSerilizedAST(patched_func)
    s1 = ";".join(tmp[0])
    s2 = ";".join(tmp[0])
    s3 = ";".join(tmp[0])
    s4 = ";".join(tmp[0])

    report = {}
    if suffix_tree_obj.search(s1, pattern1):
        report['distinct_type_and_const'] = True
    else:
        report['distinct_type_and_const'] = False

    if suffix_tree_obj.search(s2, pattern2):
        report['distinct_const_no_type'] = True
    else:
        report['distinct_const_no_type'] = False

    if suffix_tree_obj.search(s3, pattern3):
        report['distinct_type_no_const'] = True
    else:
        report['distinct_type_no_const'] = False

    if suffix_tree_obj.search(s4, pattern4):
        report['distinct_type_no_const'] = True
    else:
        report['no_type_no_const'] = False

    end_time = time.time()
    cost = round(end_time - start_time, 2)
    line = (vuln_name, patch_name, "success",
            report['distinct_type_and_const'],
            report['distinct_const_no_type'], report['distinct_type_no_const'],
            report['distinct_type_no_const'], cost, org_func_name,
            type_mapping.__str__())

    worksheet.append(line)
Exemple #8
0
def search_vuln_seg_in_func(db1, vuln_seg, vuln_func, var_map, db2, func_name, suffix_obj):
    
    print "[%s] processing %s VS %s" % (
                                   datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S"),
                                   vuln_seg, func_name)
    
    
    vuln_seg_func = get_function_ast_root(db1, vuln_seg)
    if vuln_seg_func is None:
        vuln_seg_func = get_function_ast_root(db1, vuln_func)
        
    if vuln_seg_func is None:
        print "%s  %s not found" % (vuln_seg, vuln_func)
        return (vuln_seg+"-"+vuln_func, func_name, "vuln_not_found")
    
    patched_func = get_function_ast_root(db2, func_name)
    if patched_func is None:
        print "%s is not found" % func_name
        return (vuln_seg, func_name, "patch_not_found")
    
    o1 = serializedAST(db1)
    o1.variable_maps = var_map
    ret = o1.genSerilizedAST(vuln_seg_func)
    
    #delete FunctionDef and CompoundStatement node
    pattern1 = ";".join(ret[0][2:])
    pattern2 = ";".join(ret[1][2:]) 
    pattern3 = ";".join(ret[2][2:])
    pattern4 = ";".join(ret[3][2:])
    pattern5 = ";".join(ret[4][2:])
    
    tmp = serializedAST(db2).genSerilizedAST(patched_func)
    s1 = ";".join(tmp[0][2:])
    s2 = ";".join(tmp[1][2:])
    s3 = ";".join(tmp[2][2:])
    s4 = ";".join(tmp[3][2:])
    s5 = ";".join(tmp[4][2:])
      
    report = {}
    if suffix_obj.search(s1, pattern1):
        report['distinct_type_and_const'] = True
    else:
        report['distinct_type_and_const'] = False
        
    if suffix_obj.search(s2, pattern2):
        report['distinct_const_no_type'] = True
    else:
        report['distinct_const_no_type'] = False
        
    if suffix_obj.search(s3, pattern3):
        report['distinct_type_no_const'] = True
    else:
        report['distinct_type_no_const'] = False
        
    if suffix_obj.search(s4, pattern4):
        report['no_type_no_const'] = True
    else:
        report['no_type_no_const'] = False

    if suffix_obj.search(s5, pattern5):
        report['no_mapping'] = True
    else:
        report['no_mapping'] = False
    
    #begin cfg
#     patch_root = get_function_node_by_ast_root(db2, patched_func)
#     vuln_seg_root = get_function_node_by_ast_root(db1, vuln_seg_func)
#     match, simi = func_cfg_similarity(patch_root, db2, vuln_seg_root, db1)
    
    return (vuln_seg, func_name, "success", report["distinct_type_and_const"],
                       report["distinct_const_no_type"], report["distinct_type_no_const"],
                       report["no_type_no_const"], report['no_mapping'])
Exemple #9
0
def segement_ast_similarity_process(
    vuln_name, patch_name, neo4jdb, org_func_name, type_mapping, worksheet, suffix_tree_obj
):
    start_time = time.time()
    print "[%s] processing %s" % (
        datetime.datetime.now().strftime("%y-%m-%d %H:%M:%S"),
        vuln_name + " vs " + patch_name,
    )

    # 检查数据库里面是否可以找到该函数
    vuln_func = get_function_ast_root(neo4jdb, vuln_name)
    if vuln_func is None:
        line = (
            vuln_name,
            patch_name,
            "vuln_func_not_found",
            "-",
            "-",
            "-",
            "-",
            0,
            org_func_name,
            type_mapping.__str__(),
        )
        worksheet.append(line)
        return

    # 检查数据库里面是否可以找到该函数
    patched_func = get_function_ast_root(neo4jdb, patch_name)
    if patched_func is None:
        line = (
            vuln_name,
            patch_name,
            "patch_func_not_found",
            "-",
            "-",
            "-",
            "-",
            0,
            org_func_name,
            type_mapping.__str__(),
        )
        worksheet.append(line)
        return

    # 序列化AST返回值是一个数组,0元素是序列化的AST字符串,1元素是节点个数,AST字符串以;结尾,需要去掉结尾的;
    o1 = serializedAST(neo4jdb)
    o1.data_type_mapping = type_mapping

    ret = o1.genSerilizedAST(vuln_func)
    # delete FunctionDef and CompoundStatement node [2:]
    pattern1 = ";".join(ret[0][2:])
    pattern2 = ";".join(ret[1][2:])
    pattern3 = ";".join(ret[2][2:])
    pattern4 = ";".join(ret[3][2:])

    tmp = o1.genSerilizedAST(patched_func)
    s1 = ";".join(tmp[0])
    s2 = ";".join(tmp[0])
    s3 = ";".join(tmp[0])
    s4 = ";".join(tmp[0])

    report = {}
    if suffix_tree_obj.search(s1, pattern1):
        report["distinct_type_and_const"] = True
    else:
        report["distinct_type_and_const"] = False

    if suffix_tree_obj.search(s2, pattern2):
        report["distinct_const_no_type"] = True
    else:
        report["distinct_const_no_type"] = False

    if suffix_tree_obj.search(s3, pattern3):
        report["distinct_type_no_const"] = True
    else:
        report["distinct_type_no_const"] = False

    if suffix_tree_obj.search(s4, pattern4):
        report["distinct_type_no_const"] = True
    else:
        report["no_type_no_const"] = False

    end_time = time.time()
    cost = round(end_time - start_time, 2)
    line = (
        vuln_name,
        patch_name,
        "success",
        report["distinct_type_and_const"],
        report["distinct_const_no_type"],
        report["distinct_type_no_const"],
        report["distinct_type_no_const"],
        cost,
        org_func_name,
        type_mapping.__str__(),
    )

    worksheet.append(line)
Exemple #10
0
def search_vuln_seg_in_func(db1, vuln_seg, vuln_func, var_map, db2, func_name,
                            suffix_obj):

    print "[%s] processing %s VS %s" % (datetime.datetime.now().strftime(
        "%y-%m-%d %H:%M:%S"), vuln_seg, func_name)

    vuln_seg_func = get_function_ast_root(db1, vuln_seg)
    if vuln_seg_func is None:
        vuln_seg_func = get_function_ast_root(db1, vuln_func)

    if vuln_seg_func is None:
        print "%s  %s not found" % (vuln_seg, vuln_func)
        return (vuln_seg + "-" + vuln_func, func_name, "vuln_not_found")

    patched_func = get_function_ast_root(db2, func_name)
    if patched_func is None:
        print "%s is not found" % func_name
        return (vuln_seg, func_name, "patch_not_found")

    o1 = serializedAST(db1)
    o1.variable_maps = var_map
    ret = o1.genSerilizedAST(vuln_seg_func)

    #delete FunctionDef and CompoundStatement node
    pattern1 = ";".join(ret[0][2:])
    pattern2 = ";".join(ret[1][2:])
    pattern3 = ";".join(ret[2][2:])
    pattern4 = ";".join(ret[3][2:])
    pattern5 = ";".join(ret[4][2:])

    tmp = serializedAST(db2).genSerilizedAST(patched_func)
    s1 = ";".join(tmp[0][2:])
    s2 = ";".join(tmp[1][2:])
    s3 = ";".join(tmp[2][2:])
    s4 = ";".join(tmp[3][2:])
    s5 = ";".join(tmp[4][2:])

    report = {}
    if suffix_obj.search(s1, pattern1):
        report['distinct_type_and_const'] = True
    else:
        report['distinct_type_and_const'] = False

    if suffix_obj.search(s2, pattern2):
        report['distinct_const_no_type'] = True
    else:
        report['distinct_const_no_type'] = False

    if suffix_obj.search(s3, pattern3):
        report['distinct_type_no_const'] = True
    else:
        report['distinct_type_no_const'] = False

    if suffix_obj.search(s4, pattern4):
        report['no_type_no_const'] = True
    else:
        report['no_type_no_const'] = False

    if suffix_obj.search(s5, pattern5):
        report['no_mapping'] = True
    else:
        report['no_mapping'] = False

    #begin cfg


#     patch_root = get_function_node_by_ast_root(db2, patched_func)
#     vuln_seg_root = get_function_node_by_ast_root(db1, vuln_seg_func)
#     match, simi = func_cfg_similarity(patch_root, db2, vuln_seg_root, db1)

    return (vuln_seg, func_name, "success", report["distinct_type_and_const"],
            report["distinct_const_no_type"], report["distinct_type_no_const"],
            report["no_type_no_const"], report['no_mapping'])